1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AArch64.h"
17 #include "AArch64CallingConvention.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/MachineValueType.h"
69 #include "llvm/Support/MathExtras.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75
76 using namespace llvm;
77
78 namespace {
79
80 class AArch64FastISel final : public FastISel {
81 class Address {
82 public:
83 using BaseKind = enum {
84 RegBase,
85 FrameIndexBase
86 };
87
88 private:
89 BaseKind Kind = RegBase;
90 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91 union {
92 unsigned Reg;
93 int FI;
94 } Base;
95 unsigned OffsetReg = 0;
96 unsigned Shift = 0;
97 int64_t Offset = 0;
98 const GlobalValue *GV = nullptr;
99
100 public:
Address()101 Address() { Base.Reg = 0; }
102
setKind(BaseKind K)103 void setKind(BaseKind K) { Kind = K; }
getKind() const104 BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)105 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const106 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const107 bool isRegBase() const { return Kind == RegBase; }
isFIBase() const108 bool isFIBase() const { return Kind == FrameIndexBase; }
109
setReg(unsigned Reg)110 void setReg(unsigned Reg) {
111 assert(isRegBase() && "Invalid base register access!");
112 Base.Reg = Reg;
113 }
114
getReg() const115 unsigned getReg() const {
116 assert(isRegBase() && "Invalid base register access!");
117 return Base.Reg;
118 }
119
setOffsetReg(unsigned Reg)120 void setOffsetReg(unsigned Reg) {
121 OffsetReg = Reg;
122 }
123
getOffsetReg() const124 unsigned getOffsetReg() const {
125 return OffsetReg;
126 }
127
setFI(unsigned FI)128 void setFI(unsigned FI) {
129 assert(isFIBase() && "Invalid base frame index access!");
130 Base.FI = FI;
131 }
132
getFI() const133 unsigned getFI() const {
134 assert(isFIBase() && "Invalid base frame index access!");
135 return Base.FI;
136 }
137
setOffset(int64_t O)138 void setOffset(int64_t O) { Offset = O; }
getOffset()139 int64_t getOffset() { return Offset; }
setShift(unsigned S)140 void setShift(unsigned S) { Shift = S; }
getShift()141 unsigned getShift() { return Shift; }
142
setGlobalValue(const GlobalValue * G)143 void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()144 const GlobalValue *getGlobalValue() { return GV; }
145 };
146
147 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148 /// make the right decision when generating code for different targets.
149 const AArch64Subtarget *Subtarget;
150 LLVMContext *Context;
151
152 bool fastLowerArguments() override;
153 bool fastLowerCall(CallLoweringInfo &CLI) override;
154 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155
156 private:
157 // Selection routines.
158 bool selectAddSub(const Instruction *I);
159 bool selectLogicalOp(const Instruction *I);
160 bool selectLoad(const Instruction *I);
161 bool selectStore(const Instruction *I);
162 bool selectBranch(const Instruction *I);
163 bool selectIndirectBr(const Instruction *I);
164 bool selectCmp(const Instruction *I);
165 bool selectSelect(const Instruction *I);
166 bool selectFPExt(const Instruction *I);
167 bool selectFPTrunc(const Instruction *I);
168 bool selectFPToInt(const Instruction *I, bool Signed);
169 bool selectIntToFP(const Instruction *I, bool Signed);
170 bool selectRem(const Instruction *I, unsigned ISDOpcode);
171 bool selectRet(const Instruction *I);
172 bool selectTrunc(const Instruction *I);
173 bool selectIntExt(const Instruction *I);
174 bool selectMul(const Instruction *I);
175 bool selectShift(const Instruction *I);
176 bool selectBitCast(const Instruction *I);
177 bool selectFRem(const Instruction *I);
178 bool selectSDiv(const Instruction *I);
179 bool selectGetElementPtr(const Instruction *I);
180 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181
182 // Utility helper routines.
183 bool isTypeLegal(Type *Ty, MVT &VT);
184 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185 bool isValueAvailable(const Value *V) const;
186 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187 bool computeCallAddress(const Value *V, Address &Addr);
188 bool simplifyAddress(Address &Addr, MVT VT);
189 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190 MachineMemOperand::Flags Flags,
191 unsigned ScaleFactor, MachineMemOperand *MMO);
192 bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194 unsigned Alignment);
195 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196 const Value *Cond);
197 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198 bool optimizeSelect(const SelectInst *SI);
199 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
200
201 // Emit helper routines.
202 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203 const Value *RHS, bool SetFlags = false,
204 bool WantResult = true, bool IsZExt = false);
205 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
207 bool SetFlags = false, bool WantResult = true);
208 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209 bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
210 bool WantResult = true);
211 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
213 AArch64_AM::ShiftExtendType ShiftType,
214 uint64_t ShiftImm, bool SetFlags = false,
215 bool WantResult = true);
216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
217 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
218 AArch64_AM::ShiftExtendType ExtType,
219 uint64_t ShiftImm, bool SetFlags = false,
220 bool WantResult = true);
221
222 // Emit functions.
223 bool emitCompareAndBranch(const BranchInst *BI);
224 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
227 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229 MachineMemOperand *MMO = nullptr);
230 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231 MachineMemOperand *MMO = nullptr);
232 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233 MachineMemOperand *MMO = nullptr);
234 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237 bool SetFlags = false, bool WantResult = true,
238 bool IsZExt = false);
239 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
240 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241 bool SetFlags = false, bool WantResult = true,
242 bool IsZExt = false);
243 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
244 unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
245 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
246 unsigned RHSReg, bool RHSIsKill,
247 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
248 bool WantResult = true);
249 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
250 const Value *RHS);
251 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
252 bool LHSIsKill, uint64_t Imm);
253 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
254 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
255 uint64_t ShiftImm);
256 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
257 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
258 unsigned Op1, bool Op1IsKill);
259 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
260 unsigned Op1, bool Op1IsKill);
261 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
262 unsigned Op1, bool Op1IsKill);
263 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
264 unsigned Op1Reg, bool Op1IsKill);
265 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
266 uint64_t Imm, bool IsZExt = true);
267 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
268 unsigned Op1Reg, bool Op1IsKill);
269 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
270 uint64_t Imm, bool IsZExt = true);
271 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
272 unsigned Op1Reg, bool Op1IsKill);
273 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
274 uint64_t Imm, bool IsZExt = false);
275
276 unsigned materializeInt(const ConstantInt *CI, MVT VT);
277 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
278 unsigned materializeGV(const GlobalValue *GV);
279
280 // Call handling routines.
281 private:
282 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
283 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
284 unsigned &NumBytes);
285 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
286
287 public:
288 // Backend specific FastISel code.
289 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
290 unsigned fastMaterializeConstant(const Constant *C) override;
291 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
292
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)293 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
294 const TargetLibraryInfo *LibInfo)
295 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
296 Subtarget =
297 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
298 Context = &FuncInfo.Fn->getContext();
299 }
300
301 bool fastSelectInstruction(const Instruction *I) override;
302
303 #include "AArch64GenFastISel.inc"
304 };
305
306 } // end anonymous namespace
307
308 #include "AArch64GenCallingConv.inc"
309
310 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)311 static bool isIntExtFree(const Instruction *I) {
312 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
313 "Unexpected integer extend instruction.");
314 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
315 "Unexpected value type.");
316 bool IsZExt = isa<ZExtInst>(I);
317
318 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
319 if (LI->hasOneUse())
320 return true;
321
322 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
323 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
324 return true;
325
326 return false;
327 }
328
329 /// Determine the implicit scale factor that is applied by a memory
330 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)331 static unsigned getImplicitScaleFactor(MVT VT) {
332 switch (VT.SimpleTy) {
333 default:
334 return 0; // invalid
335 case MVT::i1: // fall-through
336 case MVT::i8:
337 return 1;
338 case MVT::i16:
339 return 2;
340 case MVT::i32: // fall-through
341 case MVT::f32:
342 return 4;
343 case MVT::i64: // fall-through
344 case MVT::f64:
345 return 8;
346 }
347 }
348
CCAssignFnForCall(CallingConv::ID CC) const349 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
350 if (CC == CallingConv::WebKit_JS)
351 return CC_AArch64_WebKit_JS;
352 if (CC == CallingConv::GHC)
353 return CC_AArch64_GHC;
354 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
355 }
356
fastMaterializeAlloca(const AllocaInst * AI)357 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
358 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
359 "Alloca should always return a pointer.");
360
361 // Don't handle dynamic allocas.
362 if (!FuncInfo.StaticAllocaMap.count(AI))
363 return 0;
364
365 DenseMap<const AllocaInst *, int>::iterator SI =
366 FuncInfo.StaticAllocaMap.find(AI);
367
368 if (SI != FuncInfo.StaticAllocaMap.end()) {
369 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
370 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
371 ResultReg)
372 .addFrameIndex(SI->second)
373 .addImm(0)
374 .addImm(0);
375 return ResultReg;
376 }
377
378 return 0;
379 }
380
materializeInt(const ConstantInt * CI,MVT VT)381 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
382 if (VT > MVT::i64)
383 return 0;
384
385 if (!CI->isZero())
386 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
387
388 // Create a copy from the zero register to materialize a "0" value.
389 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
390 : &AArch64::GPR32RegClass;
391 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
392 unsigned ResultReg = createResultReg(RC);
393 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
394 ResultReg).addReg(ZeroReg, getKillRegState(true));
395 return ResultReg;
396 }
397
materializeFP(const ConstantFP * CFP,MVT VT)398 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
399 // Positive zero (+0.0) has to be materialized with a fmov from the zero
400 // register, because the immediate version of fmov cannot encode zero.
401 if (CFP->isNullValue())
402 return fastMaterializeFloatZero(CFP);
403
404 if (VT != MVT::f32 && VT != MVT::f64)
405 return 0;
406
407 const APFloat Val = CFP->getValueAPF();
408 bool Is64Bit = (VT == MVT::f64);
409 // This checks to see if we can use FMOV instructions to materialize
410 // a constant, otherwise we have to materialize via the constant pool.
411 if (TLI.isFPImmLegal(Val, VT)) {
412 int Imm =
413 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
414 assert((Imm != -1) && "Cannot encode floating-point constant.");
415 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
416 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
417 }
418
419 // For the MachO large code model materialize the FP constant in code.
420 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
421 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
422 const TargetRegisterClass *RC = Is64Bit ?
423 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
424
425 unsigned TmpReg = createResultReg(RC);
426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
427 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
428
429 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
430 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
431 TII.get(TargetOpcode::COPY), ResultReg)
432 .addReg(TmpReg, getKillRegState(true));
433
434 return ResultReg;
435 }
436
437 // Materialize via constant pool. MachineConstantPool wants an explicit
438 // alignment.
439 unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
440 if (Align == 0)
441 Align = DL.getTypeAllocSize(CFP->getType());
442
443 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
444 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
446 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
447
448 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
449 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
451 .addReg(ADRPReg)
452 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
453 return ResultReg;
454 }
455
materializeGV(const GlobalValue * GV)456 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
457 // We can't handle thread-local variables quickly yet.
458 if (GV->isThreadLocal())
459 return 0;
460
461 // MachO still uses GOT for large code-model accesses, but ELF requires
462 // movz/movk sequences, which FastISel doesn't handle yet.
463 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
464 return 0;
465
466 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
467
468 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
469 if (!DestEVT.isSimple())
470 return 0;
471
472 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
473 unsigned ResultReg;
474
475 if (OpFlags & AArch64II::MO_GOT) {
476 // ADRP + LDRX
477 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
478 ADRPReg)
479 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
480
481 ResultReg = createResultReg(&AArch64::GPR64RegClass);
482 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
483 ResultReg)
484 .addReg(ADRPReg)
485 .addGlobalAddress(GV, 0,
486 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
487 } else {
488 // ADRP + ADDX
489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
490 ADRPReg)
491 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
492
493 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
495 ResultReg)
496 .addReg(ADRPReg)
497 .addGlobalAddress(GV, 0,
498 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
499 .addImm(0);
500 }
501 return ResultReg;
502 }
503
fastMaterializeConstant(const Constant * C)504 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
505 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
506
507 // Only handle simple types.
508 if (!CEVT.isSimple())
509 return 0;
510 MVT VT = CEVT.getSimpleVT();
511
512 if (const auto *CI = dyn_cast<ConstantInt>(C))
513 return materializeInt(CI, VT);
514 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
515 return materializeFP(CFP, VT);
516 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
517 return materializeGV(GV);
518
519 return 0;
520 }
521
fastMaterializeFloatZero(const ConstantFP * CFP)522 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
523 assert(CFP->isNullValue() &&
524 "Floating-point constant is not a positive zero.");
525 MVT VT;
526 if (!isTypeLegal(CFP->getType(), VT))
527 return 0;
528
529 if (VT != MVT::f32 && VT != MVT::f64)
530 return 0;
531
532 bool Is64Bit = (VT == MVT::f64);
533 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
534 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
535 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
536 }
537
538 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)539 static bool isMulPowOf2(const Value *I) {
540 if (const auto *MI = dyn_cast<MulOperator>(I)) {
541 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
542 if (C->getValue().isPowerOf2())
543 return true;
544 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
545 if (C->getValue().isPowerOf2())
546 return true;
547 }
548 return false;
549 }
550
551 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)552 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
553 {
554 const User *U = nullptr;
555 unsigned Opcode = Instruction::UserOp1;
556 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
557 // Don't walk into other basic blocks unless the object is an alloca from
558 // another block, otherwise it may not have a virtual register assigned.
559 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
560 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
561 Opcode = I->getOpcode();
562 U = I;
563 }
564 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
565 Opcode = C->getOpcode();
566 U = C;
567 }
568
569 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
570 if (Ty->getAddressSpace() > 255)
571 // Fast instruction selection doesn't support the special
572 // address spaces.
573 return false;
574
575 switch (Opcode) {
576 default:
577 break;
578 case Instruction::BitCast:
579 // Look through bitcasts.
580 return computeAddress(U->getOperand(0), Addr, Ty);
581
582 case Instruction::IntToPtr:
583 // Look past no-op inttoptrs.
584 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
585 TLI.getPointerTy(DL))
586 return computeAddress(U->getOperand(0), Addr, Ty);
587 break;
588
589 case Instruction::PtrToInt:
590 // Look past no-op ptrtoints.
591 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
592 return computeAddress(U->getOperand(0), Addr, Ty);
593 break;
594
595 case Instruction::GetElementPtr: {
596 Address SavedAddr = Addr;
597 uint64_t TmpOffset = Addr.getOffset();
598
599 // Iterate through the GEP folding the constants into offsets where
600 // we can.
601 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
602 GTI != E; ++GTI) {
603 const Value *Op = GTI.getOperand();
604 if (StructType *STy = GTI.getStructTypeOrNull()) {
605 const StructLayout *SL = DL.getStructLayout(STy);
606 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
607 TmpOffset += SL->getElementOffset(Idx);
608 } else {
609 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
610 while (true) {
611 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
612 // Constant-offset addressing.
613 TmpOffset += CI->getSExtValue() * S;
614 break;
615 }
616 if (canFoldAddIntoGEP(U, Op)) {
617 // A compatible add with a constant operand. Fold the constant.
618 ConstantInt *CI =
619 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
620 TmpOffset += CI->getSExtValue() * S;
621 // Iterate on the other operand.
622 Op = cast<AddOperator>(Op)->getOperand(0);
623 continue;
624 }
625 // Unsupported
626 goto unsupported_gep;
627 }
628 }
629 }
630
631 // Try to grab the base operand now.
632 Addr.setOffset(TmpOffset);
633 if (computeAddress(U->getOperand(0), Addr, Ty))
634 return true;
635
636 // We failed, restore everything and try the other options.
637 Addr = SavedAddr;
638
639 unsupported_gep:
640 break;
641 }
642 case Instruction::Alloca: {
643 const AllocaInst *AI = cast<AllocaInst>(Obj);
644 DenseMap<const AllocaInst *, int>::iterator SI =
645 FuncInfo.StaticAllocaMap.find(AI);
646 if (SI != FuncInfo.StaticAllocaMap.end()) {
647 Addr.setKind(Address::FrameIndexBase);
648 Addr.setFI(SI->second);
649 return true;
650 }
651 break;
652 }
653 case Instruction::Add: {
654 // Adds of constants are common and easy enough.
655 const Value *LHS = U->getOperand(0);
656 const Value *RHS = U->getOperand(1);
657
658 if (isa<ConstantInt>(LHS))
659 std::swap(LHS, RHS);
660
661 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
662 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
663 return computeAddress(LHS, Addr, Ty);
664 }
665
666 Address Backup = Addr;
667 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
668 return true;
669 Addr = Backup;
670
671 break;
672 }
673 case Instruction::Sub: {
674 // Subs of constants are common and easy enough.
675 const Value *LHS = U->getOperand(0);
676 const Value *RHS = U->getOperand(1);
677
678 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
679 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
680 return computeAddress(LHS, Addr, Ty);
681 }
682 break;
683 }
684 case Instruction::Shl: {
685 if (Addr.getOffsetReg())
686 break;
687
688 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
689 if (!CI)
690 break;
691
692 unsigned Val = CI->getZExtValue();
693 if (Val < 1 || Val > 3)
694 break;
695
696 uint64_t NumBytes = 0;
697 if (Ty && Ty->isSized()) {
698 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
699 NumBytes = NumBits / 8;
700 if (!isPowerOf2_64(NumBits))
701 NumBytes = 0;
702 }
703
704 if (NumBytes != (1ULL << Val))
705 break;
706
707 Addr.setShift(Val);
708 Addr.setExtendType(AArch64_AM::LSL);
709
710 const Value *Src = U->getOperand(0);
711 if (const auto *I = dyn_cast<Instruction>(Src)) {
712 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
713 // Fold the zext or sext when it won't become a noop.
714 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
715 if (!isIntExtFree(ZE) &&
716 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
717 Addr.setExtendType(AArch64_AM::UXTW);
718 Src = ZE->getOperand(0);
719 }
720 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
721 if (!isIntExtFree(SE) &&
722 SE->getOperand(0)->getType()->isIntegerTy(32)) {
723 Addr.setExtendType(AArch64_AM::SXTW);
724 Src = SE->getOperand(0);
725 }
726 }
727 }
728 }
729
730 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
731 if (AI->getOpcode() == Instruction::And) {
732 const Value *LHS = AI->getOperand(0);
733 const Value *RHS = AI->getOperand(1);
734
735 if (const auto *C = dyn_cast<ConstantInt>(LHS))
736 if (C->getValue() == 0xffffffff)
737 std::swap(LHS, RHS);
738
739 if (const auto *C = dyn_cast<ConstantInt>(RHS))
740 if (C->getValue() == 0xffffffff) {
741 Addr.setExtendType(AArch64_AM::UXTW);
742 unsigned Reg = getRegForValue(LHS);
743 if (!Reg)
744 return false;
745 bool RegIsKill = hasTrivialKill(LHS);
746 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
747 AArch64::sub_32);
748 Addr.setOffsetReg(Reg);
749 return true;
750 }
751 }
752
753 unsigned Reg = getRegForValue(Src);
754 if (!Reg)
755 return false;
756 Addr.setOffsetReg(Reg);
757 return true;
758 }
759 case Instruction::Mul: {
760 if (Addr.getOffsetReg())
761 break;
762
763 if (!isMulPowOf2(U))
764 break;
765
766 const Value *LHS = U->getOperand(0);
767 const Value *RHS = U->getOperand(1);
768
769 // Canonicalize power-of-2 value to the RHS.
770 if (const auto *C = dyn_cast<ConstantInt>(LHS))
771 if (C->getValue().isPowerOf2())
772 std::swap(LHS, RHS);
773
774 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
775 const auto *C = cast<ConstantInt>(RHS);
776 unsigned Val = C->getValue().logBase2();
777 if (Val < 1 || Val > 3)
778 break;
779
780 uint64_t NumBytes = 0;
781 if (Ty && Ty->isSized()) {
782 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
783 NumBytes = NumBits / 8;
784 if (!isPowerOf2_64(NumBits))
785 NumBytes = 0;
786 }
787
788 if (NumBytes != (1ULL << Val))
789 break;
790
791 Addr.setShift(Val);
792 Addr.setExtendType(AArch64_AM::LSL);
793
794 const Value *Src = LHS;
795 if (const auto *I = dyn_cast<Instruction>(Src)) {
796 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
797 // Fold the zext or sext when it won't become a noop.
798 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
799 if (!isIntExtFree(ZE) &&
800 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
801 Addr.setExtendType(AArch64_AM::UXTW);
802 Src = ZE->getOperand(0);
803 }
804 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
805 if (!isIntExtFree(SE) &&
806 SE->getOperand(0)->getType()->isIntegerTy(32)) {
807 Addr.setExtendType(AArch64_AM::SXTW);
808 Src = SE->getOperand(0);
809 }
810 }
811 }
812 }
813
814 unsigned Reg = getRegForValue(Src);
815 if (!Reg)
816 return false;
817 Addr.setOffsetReg(Reg);
818 return true;
819 }
820 case Instruction::And: {
821 if (Addr.getOffsetReg())
822 break;
823
824 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
825 break;
826
827 const Value *LHS = U->getOperand(0);
828 const Value *RHS = U->getOperand(1);
829
830 if (const auto *C = dyn_cast<ConstantInt>(LHS))
831 if (C->getValue() == 0xffffffff)
832 std::swap(LHS, RHS);
833
834 if (const auto *C = dyn_cast<ConstantInt>(RHS))
835 if (C->getValue() == 0xffffffff) {
836 Addr.setShift(0);
837 Addr.setExtendType(AArch64_AM::LSL);
838 Addr.setExtendType(AArch64_AM::UXTW);
839
840 unsigned Reg = getRegForValue(LHS);
841 if (!Reg)
842 return false;
843 bool RegIsKill = hasTrivialKill(LHS);
844 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
845 AArch64::sub_32);
846 Addr.setOffsetReg(Reg);
847 return true;
848 }
849 break;
850 }
851 case Instruction::SExt:
852 case Instruction::ZExt: {
853 if (!Addr.getReg() || Addr.getOffsetReg())
854 break;
855
856 const Value *Src = nullptr;
857 // Fold the zext or sext when it won't become a noop.
858 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
859 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
860 Addr.setExtendType(AArch64_AM::UXTW);
861 Src = ZE->getOperand(0);
862 }
863 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
864 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
865 Addr.setExtendType(AArch64_AM::SXTW);
866 Src = SE->getOperand(0);
867 }
868 }
869
870 if (!Src)
871 break;
872
873 Addr.setShift(0);
874 unsigned Reg = getRegForValue(Src);
875 if (!Reg)
876 return false;
877 Addr.setOffsetReg(Reg);
878 return true;
879 }
880 } // end switch
881
882 if (Addr.isRegBase() && !Addr.getReg()) {
883 unsigned Reg = getRegForValue(Obj);
884 if (!Reg)
885 return false;
886 Addr.setReg(Reg);
887 return true;
888 }
889
890 if (!Addr.getOffsetReg()) {
891 unsigned Reg = getRegForValue(Obj);
892 if (!Reg)
893 return false;
894 Addr.setOffsetReg(Reg);
895 return true;
896 }
897
898 return false;
899 }
900
computeCallAddress(const Value * V,Address & Addr)901 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
902 const User *U = nullptr;
903 unsigned Opcode = Instruction::UserOp1;
904 bool InMBB = true;
905
906 if (const auto *I = dyn_cast<Instruction>(V)) {
907 Opcode = I->getOpcode();
908 U = I;
909 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
910 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
911 Opcode = C->getOpcode();
912 U = C;
913 }
914
915 switch (Opcode) {
916 default: break;
917 case Instruction::BitCast:
918 // Look past bitcasts if its operand is in the same BB.
919 if (InMBB)
920 return computeCallAddress(U->getOperand(0), Addr);
921 break;
922 case Instruction::IntToPtr:
923 // Look past no-op inttoptrs if its operand is in the same BB.
924 if (InMBB &&
925 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
926 TLI.getPointerTy(DL))
927 return computeCallAddress(U->getOperand(0), Addr);
928 break;
929 case Instruction::PtrToInt:
930 // Look past no-op ptrtoints if its operand is in the same BB.
931 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
932 return computeCallAddress(U->getOperand(0), Addr);
933 break;
934 }
935
936 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
937 Addr.setGlobalValue(GV);
938 return true;
939 }
940
941 // If all else fails, try to materialize the value in a register.
942 if (!Addr.getGlobalValue()) {
943 Addr.setReg(getRegForValue(V));
944 return Addr.getReg() != 0;
945 }
946
947 return false;
948 }
949
isTypeLegal(Type * Ty,MVT & VT)950 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
951 EVT evt = TLI.getValueType(DL, Ty, true);
952
953 // Only handle simple types.
954 if (evt == MVT::Other || !evt.isSimple())
955 return false;
956 VT = evt.getSimpleVT();
957
958 // This is a legal type, but it's not something we handle in fast-isel.
959 if (VT == MVT::f128)
960 return false;
961
962 // Handle all other legal types, i.e. a register that will directly hold this
963 // value.
964 return TLI.isTypeLegal(VT);
965 }
966
967 /// Determine if the value type is supported by FastISel.
968 ///
969 /// FastISel for AArch64 can handle more value types than are legal. This adds
970 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)971 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
972 if (Ty->isVectorTy() && !IsVectorAllowed)
973 return false;
974
975 if (isTypeLegal(Ty, VT))
976 return true;
977
978 // If this is a type than can be sign or zero-extended to a basic operation
979 // go ahead and accept it now.
980 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
981 return true;
982
983 return false;
984 }
985
isValueAvailable(const Value * V) const986 bool AArch64FastISel::isValueAvailable(const Value *V) const {
987 if (!isa<Instruction>(V))
988 return true;
989
990 const auto *I = cast<Instruction>(V);
991 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
992 }
993
simplifyAddress(Address & Addr,MVT VT)994 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
995 unsigned ScaleFactor = getImplicitScaleFactor(VT);
996 if (!ScaleFactor)
997 return false;
998
999 bool ImmediateOffsetNeedsLowering = false;
1000 bool RegisterOffsetNeedsLowering = false;
1001 int64_t Offset = Addr.getOffset();
1002 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1003 ImmediateOffsetNeedsLowering = true;
1004 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1005 !isUInt<12>(Offset / ScaleFactor))
1006 ImmediateOffsetNeedsLowering = true;
1007
1008 // Cannot encode an offset register and an immediate offset in the same
1009 // instruction. Fold the immediate offset into the load/store instruction and
1010 // emit an additional add to take care of the offset register.
1011 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1012 RegisterOffsetNeedsLowering = true;
1013
1014 // Cannot encode zero register as base.
1015 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1016 RegisterOffsetNeedsLowering = true;
1017
1018 // If this is a stack pointer and the offset needs to be simplified then put
1019 // the alloca address into a register, set the base type back to register and
1020 // continue. This should almost never happen.
1021 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1022 {
1023 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1025 ResultReg)
1026 .addFrameIndex(Addr.getFI())
1027 .addImm(0)
1028 .addImm(0);
1029 Addr.setKind(Address::RegBase);
1030 Addr.setReg(ResultReg);
1031 }
1032
1033 if (RegisterOffsetNeedsLowering) {
1034 unsigned ResultReg = 0;
1035 if (Addr.getReg()) {
1036 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1037 Addr.getExtendType() == AArch64_AM::UXTW )
1038 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1039 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1040 /*TODO:IsKill=*/false, Addr.getExtendType(),
1041 Addr.getShift());
1042 else
1043 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1044 /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1045 /*TODO:IsKill=*/false, AArch64_AM::LSL,
1046 Addr.getShift());
1047 } else {
1048 if (Addr.getExtendType() == AArch64_AM::UXTW)
1049 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050 /*Op0IsKill=*/false, Addr.getShift(),
1051 /*IsZExt=*/true);
1052 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1053 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1054 /*Op0IsKill=*/false, Addr.getShift(),
1055 /*IsZExt=*/false);
1056 else
1057 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1058 /*Op0IsKill=*/false, Addr.getShift());
1059 }
1060 if (!ResultReg)
1061 return false;
1062
1063 Addr.setReg(ResultReg);
1064 Addr.setOffsetReg(0);
1065 Addr.setShift(0);
1066 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1067 }
1068
1069 // Since the offset is too large for the load/store instruction get the
1070 // reg+offset into a register.
1071 if (ImmediateOffsetNeedsLowering) {
1072 unsigned ResultReg;
1073 if (Addr.getReg())
1074 // Try to fold the immediate into the add instruction.
1075 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1076 else
1077 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1078
1079 if (!ResultReg)
1080 return false;
1081 Addr.setReg(ResultReg);
1082 Addr.setOffset(0);
1083 }
1084 return true;
1085 }
1086
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1087 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1088 const MachineInstrBuilder &MIB,
1089 MachineMemOperand::Flags Flags,
1090 unsigned ScaleFactor,
1091 MachineMemOperand *MMO) {
1092 int64_t Offset = Addr.getOffset() / ScaleFactor;
1093 // Frame base works a bit differently. Handle it separately.
1094 if (Addr.isFIBase()) {
1095 int FI = Addr.getFI();
1096 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1097 // and alignment should be based on the VT.
1098 MMO = FuncInfo.MF->getMachineMemOperand(
1099 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1100 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1101 // Now add the rest of the operands.
1102 MIB.addFrameIndex(FI).addImm(Offset);
1103 } else {
1104 assert(Addr.isRegBase() && "Unexpected address kind.");
1105 const MCInstrDesc &II = MIB->getDesc();
1106 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1107 Addr.setReg(
1108 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1109 Addr.setOffsetReg(
1110 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1111 if (Addr.getOffsetReg()) {
1112 assert(Addr.getOffset() == 0 && "Unexpected offset");
1113 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1114 Addr.getExtendType() == AArch64_AM::SXTX;
1115 MIB.addReg(Addr.getReg());
1116 MIB.addReg(Addr.getOffsetReg());
1117 MIB.addImm(IsSigned);
1118 MIB.addImm(Addr.getShift() != 0);
1119 } else
1120 MIB.addReg(Addr.getReg()).addImm(Offset);
1121 }
1122
1123 if (MMO)
1124 MIB.addMemOperand(MMO);
1125 }
1126
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1127 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1128 const Value *RHS, bool SetFlags,
1129 bool WantResult, bool IsZExt) {
1130 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1131 bool NeedExtend = false;
1132 switch (RetVT.SimpleTy) {
1133 default:
1134 return 0;
1135 case MVT::i1:
1136 NeedExtend = true;
1137 break;
1138 case MVT::i8:
1139 NeedExtend = true;
1140 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1141 break;
1142 case MVT::i16:
1143 NeedExtend = true;
1144 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1145 break;
1146 case MVT::i32: // fall-through
1147 case MVT::i64:
1148 break;
1149 }
1150 MVT SrcVT = RetVT;
1151 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1152
1153 // Canonicalize immediates to the RHS first.
1154 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1155 std::swap(LHS, RHS);
1156
1157 // Canonicalize mul by power of 2 to the RHS.
1158 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1159 if (isMulPowOf2(LHS))
1160 std::swap(LHS, RHS);
1161
1162 // Canonicalize shift immediate to the RHS.
1163 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1164 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1165 if (isa<ConstantInt>(SI->getOperand(1)))
1166 if (SI->getOpcode() == Instruction::Shl ||
1167 SI->getOpcode() == Instruction::LShr ||
1168 SI->getOpcode() == Instruction::AShr )
1169 std::swap(LHS, RHS);
1170
1171 unsigned LHSReg = getRegForValue(LHS);
1172 if (!LHSReg)
1173 return 0;
1174 bool LHSIsKill = hasTrivialKill(LHS);
1175
1176 if (NeedExtend)
1177 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1178
1179 unsigned ResultReg = 0;
1180 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1181 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1182 if (C->isNegative())
1183 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1184 SetFlags, WantResult);
1185 else
1186 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1187 WantResult);
1188 } else if (const auto *C = dyn_cast<Constant>(RHS))
1189 if (C->isNullValue())
1190 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1191 WantResult);
1192
1193 if (ResultReg)
1194 return ResultReg;
1195
1196 // Only extend the RHS within the instruction if there is a valid extend type.
1197 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1198 isValueAvailable(RHS)) {
1199 if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1200 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1201 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1202 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1203 if (!RHSReg)
1204 return 0;
1205 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1206 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1207 RHSIsKill, ExtendType, C->getZExtValue(),
1208 SetFlags, WantResult);
1209 }
1210 unsigned RHSReg = getRegForValue(RHS);
1211 if (!RHSReg)
1212 return 0;
1213 bool RHSIsKill = hasTrivialKill(RHS);
1214 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1215 ExtendType, 0, SetFlags, WantResult);
1216 }
1217
1218 // Check if the mul can be folded into the instruction.
1219 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1220 if (isMulPowOf2(RHS)) {
1221 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1222 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1223
1224 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1225 if (C->getValue().isPowerOf2())
1226 std::swap(MulLHS, MulRHS);
1227
1228 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1229 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1230 unsigned RHSReg = getRegForValue(MulLHS);
1231 if (!RHSReg)
1232 return 0;
1233 bool RHSIsKill = hasTrivialKill(MulLHS);
1234 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1235 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1236 WantResult);
1237 if (ResultReg)
1238 return ResultReg;
1239 }
1240 }
1241
1242 // Check if the shift can be folded into the instruction.
1243 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1245 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1246 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1247 switch (SI->getOpcode()) {
1248 default: break;
1249 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1250 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1251 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1252 }
1253 uint64_t ShiftVal = C->getZExtValue();
1254 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1255 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1256 if (!RHSReg)
1257 return 0;
1258 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1259 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1260 RHSIsKill, ShiftType, ShiftVal, SetFlags,
1261 WantResult);
1262 if (ResultReg)
1263 return ResultReg;
1264 }
1265 }
1266 }
1267 }
1268
1269 unsigned RHSReg = getRegForValue(RHS);
1270 if (!RHSReg)
1271 return 0;
1272 bool RHSIsKill = hasTrivialKill(RHS);
1273
1274 if (NeedExtend)
1275 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276
1277 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1278 SetFlags, WantResult);
1279 }
1280
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool SetFlags,bool WantResult)1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282 bool LHSIsKill, unsigned RHSReg,
1283 bool RHSIsKill, bool SetFlags,
1284 bool WantResult) {
1285 assert(LHSReg && RHSReg && "Invalid register number.");
1286
1287 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1288 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1289 return 0;
1290
1291 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1292 return 0;
1293
1294 static const unsigned OpcTable[2][2][2] = {
1295 { { AArch64::SUBWrr, AArch64::SUBXrr },
1296 { AArch64::ADDWrr, AArch64::ADDXrr } },
1297 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1298 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1299 };
1300 bool Is64Bit = RetVT == MVT::i64;
1301 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1302 const TargetRegisterClass *RC =
1303 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1304 unsigned ResultReg;
1305 if (WantResult)
1306 ResultReg = createResultReg(RC);
1307 else
1308 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1309
1310 const MCInstrDesc &II = TII.get(Opc);
1311 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1312 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1313 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1314 .addReg(LHSReg, getKillRegState(LHSIsKill))
1315 .addReg(RHSReg, getKillRegState(RHSIsKill));
1316 return ResultReg;
1317 }
1318
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm,bool SetFlags,bool WantResult)1319 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1320 bool LHSIsKill, uint64_t Imm,
1321 bool SetFlags, bool WantResult) {
1322 assert(LHSReg && "Invalid register number.");
1323
1324 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1325 return 0;
1326
1327 unsigned ShiftImm;
1328 if (isUInt<12>(Imm))
1329 ShiftImm = 0;
1330 else if ((Imm & 0xfff000) == Imm) {
1331 ShiftImm = 12;
1332 Imm >>= 12;
1333 } else
1334 return 0;
1335
1336 static const unsigned OpcTable[2][2][2] = {
1337 { { AArch64::SUBWri, AArch64::SUBXri },
1338 { AArch64::ADDWri, AArch64::ADDXri } },
1339 { { AArch64::SUBSWri, AArch64::SUBSXri },
1340 { AArch64::ADDSWri, AArch64::ADDSXri } }
1341 };
1342 bool Is64Bit = RetVT == MVT::i64;
1343 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1344 const TargetRegisterClass *RC;
1345 if (SetFlags)
1346 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1347 else
1348 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1349 unsigned ResultReg;
1350 if (WantResult)
1351 ResultReg = createResultReg(RC);
1352 else
1353 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1354
1355 const MCInstrDesc &II = TII.get(Opc);
1356 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1357 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1358 .addReg(LHSReg, getKillRegState(LHSIsKill))
1359 .addImm(Imm)
1360 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1361 return ResultReg;
1362 }
1363
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1364 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1365 bool LHSIsKill, unsigned RHSReg,
1366 bool RHSIsKill,
1367 AArch64_AM::ShiftExtendType ShiftType,
1368 uint64_t ShiftImm, bool SetFlags,
1369 bool WantResult) {
1370 assert(LHSReg && RHSReg && "Invalid register number.");
1371 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1372 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1373
1374 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1375 return 0;
1376
1377 // Don't deal with undefined shifts.
1378 if (ShiftImm >= RetVT.getSizeInBits())
1379 return 0;
1380
1381 static const unsigned OpcTable[2][2][2] = {
1382 { { AArch64::SUBWrs, AArch64::SUBXrs },
1383 { AArch64::ADDWrs, AArch64::ADDXrs } },
1384 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1385 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1386 };
1387 bool Is64Bit = RetVT == MVT::i64;
1388 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1389 const TargetRegisterClass *RC =
1390 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1391 unsigned ResultReg;
1392 if (WantResult)
1393 ResultReg = createResultReg(RC);
1394 else
1395 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1396
1397 const MCInstrDesc &II = TII.get(Opc);
1398 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1399 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1400 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1401 .addReg(LHSReg, getKillRegState(LHSIsKill))
1402 .addReg(RHSReg, getKillRegState(RHSIsKill))
1403 .addImm(getShifterImm(ShiftType, ShiftImm));
1404 return ResultReg;
1405 }
1406
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1407 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1408 bool LHSIsKill, unsigned RHSReg,
1409 bool RHSIsKill,
1410 AArch64_AM::ShiftExtendType ExtType,
1411 uint64_t ShiftImm, bool SetFlags,
1412 bool WantResult) {
1413 assert(LHSReg && RHSReg && "Invalid register number.");
1414 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1415 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1416
1417 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1418 return 0;
1419
1420 if (ShiftImm >= 4)
1421 return 0;
1422
1423 static const unsigned OpcTable[2][2][2] = {
1424 { { AArch64::SUBWrx, AArch64::SUBXrx },
1425 { AArch64::ADDWrx, AArch64::ADDXrx } },
1426 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1427 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1428 };
1429 bool Is64Bit = RetVT == MVT::i64;
1430 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1431 const TargetRegisterClass *RC = nullptr;
1432 if (SetFlags)
1433 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1434 else
1435 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1436 unsigned ResultReg;
1437 if (WantResult)
1438 ResultReg = createResultReg(RC);
1439 else
1440 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1441
1442 const MCInstrDesc &II = TII.get(Opc);
1443 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1444 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1446 .addReg(LHSReg, getKillRegState(LHSIsKill))
1447 .addReg(RHSReg, getKillRegState(RHSIsKill))
1448 .addImm(getArithExtendImm(ExtType, ShiftImm));
1449 return ResultReg;
1450 }
1451
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1452 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1453 Type *Ty = LHS->getType();
1454 EVT EVT = TLI.getValueType(DL, Ty, true);
1455 if (!EVT.isSimple())
1456 return false;
1457 MVT VT = EVT.getSimpleVT();
1458
1459 switch (VT.SimpleTy) {
1460 default:
1461 return false;
1462 case MVT::i1:
1463 case MVT::i8:
1464 case MVT::i16:
1465 case MVT::i32:
1466 case MVT::i64:
1467 return emitICmp(VT, LHS, RHS, IsZExt);
1468 case MVT::f32:
1469 case MVT::f64:
1470 return emitFCmp(VT, LHS, RHS);
1471 }
1472 }
1473
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1474 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1475 bool IsZExt) {
1476 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1477 IsZExt) != 0;
1478 }
1479
emitICmp_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1480 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1481 uint64_t Imm) {
1482 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1483 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1484 }
1485
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1486 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1487 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1488 return false;
1489
1490 // Check to see if the 2nd operand is a constant that we can encode directly
1491 // in the compare.
1492 bool UseImm = false;
1493 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1494 if (CFP->isZero() && !CFP->isNegative())
1495 UseImm = true;
1496
1497 unsigned LHSReg = getRegForValue(LHS);
1498 if (!LHSReg)
1499 return false;
1500 bool LHSIsKill = hasTrivialKill(LHS);
1501
1502 if (UseImm) {
1503 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1505 .addReg(LHSReg, getKillRegState(LHSIsKill));
1506 return true;
1507 }
1508
1509 unsigned RHSReg = getRegForValue(RHS);
1510 if (!RHSReg)
1511 return false;
1512 bool RHSIsKill = hasTrivialKill(RHS);
1513
1514 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1516 .addReg(LHSReg, getKillRegState(LHSIsKill))
1517 .addReg(RHSReg, getKillRegState(RHSIsKill));
1518 return true;
1519 }
1520
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1521 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1522 bool SetFlags, bool WantResult, bool IsZExt) {
1523 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1524 IsZExt);
1525 }
1526
1527 /// This method is a wrapper to simplify add emission.
1528 ///
1529 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1530 /// that fails, then try to materialize the immediate into a register and use
1531 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,bool Op0IsKill,int64_t Imm)1532 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1533 int64_t Imm) {
1534 unsigned ResultReg;
1535 if (Imm < 0)
1536 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1537 else
1538 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1539
1540 if (ResultReg)
1541 return ResultReg;
1542
1543 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1544 if (!CReg)
1545 return 0;
1546
1547 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1548 return ResultReg;
1549 }
1550
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1551 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1552 bool SetFlags, bool WantResult, bool IsZExt) {
1553 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1554 IsZExt);
1555 }
1556
emitSubs_rr(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool WantResult)1557 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1558 bool LHSIsKill, unsigned RHSReg,
1559 bool RHSIsKill, bool WantResult) {
1560 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1561 RHSIsKill, /*SetFlags=*/true, WantResult);
1562 }
1563
emitSubs_rs(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1564 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1565 bool LHSIsKill, unsigned RHSReg,
1566 bool RHSIsKill,
1567 AArch64_AM::ShiftExtendType ShiftType,
1568 uint64_t ShiftImm, bool WantResult) {
1569 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1570 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1571 WantResult);
1572 }
1573
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1574 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1575 const Value *LHS, const Value *RHS) {
1576 // Canonicalize immediates to the RHS first.
1577 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1578 std::swap(LHS, RHS);
1579
1580 // Canonicalize mul by power-of-2 to the RHS.
1581 if (LHS->hasOneUse() && isValueAvailable(LHS))
1582 if (isMulPowOf2(LHS))
1583 std::swap(LHS, RHS);
1584
1585 // Canonicalize shift immediate to the RHS.
1586 if (LHS->hasOneUse() && isValueAvailable(LHS))
1587 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1588 if (isa<ConstantInt>(SI->getOperand(1)))
1589 std::swap(LHS, RHS);
1590
1591 unsigned LHSReg = getRegForValue(LHS);
1592 if (!LHSReg)
1593 return 0;
1594 bool LHSIsKill = hasTrivialKill(LHS);
1595
1596 unsigned ResultReg = 0;
1597 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1598 uint64_t Imm = C->getZExtValue();
1599 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1600 }
1601 if (ResultReg)
1602 return ResultReg;
1603
1604 // Check if the mul can be folded into the instruction.
1605 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1606 if (isMulPowOf2(RHS)) {
1607 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1608 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1609
1610 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1611 if (C->getValue().isPowerOf2())
1612 std::swap(MulLHS, MulRHS);
1613
1614 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1615 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1616
1617 unsigned RHSReg = getRegForValue(MulLHS);
1618 if (!RHSReg)
1619 return 0;
1620 bool RHSIsKill = hasTrivialKill(MulLHS);
1621 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1622 RHSIsKill, ShiftVal);
1623 if (ResultReg)
1624 return ResultReg;
1625 }
1626 }
1627
1628 // Check if the shift can be folded into the instruction.
1629 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1630 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1631 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1632 uint64_t ShiftVal = C->getZExtValue();
1633 unsigned RHSReg = getRegForValue(SI->getOperand(0));
1634 if (!RHSReg)
1635 return 0;
1636 bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1637 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1638 RHSIsKill, ShiftVal);
1639 if (ResultReg)
1640 return ResultReg;
1641 }
1642 }
1643
1644 unsigned RHSReg = getRegForValue(RHS);
1645 if (!RHSReg)
1646 return 0;
1647 bool RHSIsKill = hasTrivialKill(RHS);
1648
1649 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1650 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1651 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1652 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1653 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1654 }
1655 return ResultReg;
1656 }
1657
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1658 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1659 unsigned LHSReg, bool LHSIsKill,
1660 uint64_t Imm) {
1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1662 "ISD nodes are not consecutive!");
1663 static const unsigned OpcTable[3][2] = {
1664 { AArch64::ANDWri, AArch64::ANDXri },
1665 { AArch64::ORRWri, AArch64::ORRXri },
1666 { AArch64::EORWri, AArch64::EORXri }
1667 };
1668 const TargetRegisterClass *RC;
1669 unsigned Opc;
1670 unsigned RegSize;
1671 switch (RetVT.SimpleTy) {
1672 default:
1673 return 0;
1674 case MVT::i1:
1675 case MVT::i8:
1676 case MVT::i16:
1677 case MVT::i32: {
1678 unsigned Idx = ISDOpc - ISD::AND;
1679 Opc = OpcTable[Idx][0];
1680 RC = &AArch64::GPR32spRegClass;
1681 RegSize = 32;
1682 break;
1683 }
1684 case MVT::i64:
1685 Opc = OpcTable[ISDOpc - ISD::AND][1];
1686 RC = &AArch64::GPR64spRegClass;
1687 RegSize = 64;
1688 break;
1689 }
1690
1691 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1692 return 0;
1693
1694 unsigned ResultReg =
1695 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1696 AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1700 }
1701 return ResultReg;
1702 }
1703
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,uint64_t ShiftImm)1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1705 unsigned LHSReg, bool LHSIsKill,
1706 unsigned RHSReg, bool RHSIsKill,
1707 uint64_t ShiftImm) {
1708 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709 "ISD nodes are not consecutive!");
1710 static const unsigned OpcTable[3][2] = {
1711 { AArch64::ANDWrs, AArch64::ANDXrs },
1712 { AArch64::ORRWrs, AArch64::ORRXrs },
1713 { AArch64::EORWrs, AArch64::EORXrs }
1714 };
1715
1716 // Don't deal with undefined shifts.
1717 if (ShiftImm >= RetVT.getSizeInBits())
1718 return 0;
1719
1720 const TargetRegisterClass *RC;
1721 unsigned Opc;
1722 switch (RetVT.SimpleTy) {
1723 default:
1724 return 0;
1725 case MVT::i1:
1726 case MVT::i8:
1727 case MVT::i16:
1728 case MVT::i32:
1729 Opc = OpcTable[ISDOpc - ISD::AND][0];
1730 RC = &AArch64::GPR32RegClass;
1731 break;
1732 case MVT::i64:
1733 Opc = OpcTable[ISDOpc - ISD::AND][1];
1734 RC = &AArch64::GPR64RegClass;
1735 break;
1736 }
1737 unsigned ResultReg =
1738 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1739 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1743 }
1744 return ResultReg;
1745 }
1746
emitAnd_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1748 uint64_t Imm) {
1749 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1750 }
1751
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753 bool WantZExt, MachineMemOperand *MMO) {
1754 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755 return 0;
1756
1757 // Simplify this down to something we can handle.
1758 if (!simplifyAddress(Addr, VT))
1759 return 0;
1760
1761 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762 if (!ScaleFactor)
1763 llvm_unreachable("Unexpected value type.");
1764
1765 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767 bool UseScaled = true;
1768 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769 UseScaled = false;
1770 ScaleFactor = 1;
1771 }
1772
1773 static const unsigned GPOpcTable[2][8][4] = {
1774 // Sign-extend.
1775 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776 AArch64::LDURXi },
1777 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778 AArch64::LDURXi },
1779 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780 AArch64::LDRXui },
1781 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782 AArch64::LDRXui },
1783 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784 AArch64::LDRXroX },
1785 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786 AArch64::LDRXroX },
1787 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788 AArch64::LDRXroW },
1789 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790 AArch64::LDRXroW }
1791 },
1792 // Zero-extend.
1793 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794 AArch64::LDURXi },
1795 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 AArch64::LDURXi },
1797 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798 AArch64::LDRXui },
1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 AArch64::LDRXui },
1801 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802 AArch64::LDRXroX },
1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 AArch64::LDRXroX },
1805 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806 AArch64::LDRXroW },
1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 AArch64::LDRXroW }
1809 }
1810 };
1811
1812 static const unsigned FPOpcTable[4][2] = {
1813 { AArch64::LDURSi, AArch64::LDURDi },
1814 { AArch64::LDRSui, AArch64::LDRDui },
1815 { AArch64::LDRSroX, AArch64::LDRDroX },
1816 { AArch64::LDRSroW, AArch64::LDRDroW }
1817 };
1818
1819 unsigned Opc;
1820 const TargetRegisterClass *RC;
1821 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822 Addr.getOffsetReg();
1823 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825 Addr.getExtendType() == AArch64_AM::SXTW)
1826 Idx++;
1827
1828 bool IsRet64Bit = RetVT == MVT::i64;
1829 switch (VT.SimpleTy) {
1830 default:
1831 llvm_unreachable("Unexpected value type.");
1832 case MVT::i1: // Intentional fall-through.
1833 case MVT::i8:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i16:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i32:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845 RC = (IsRet64Bit && !WantZExt) ?
1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 break;
1848 case MVT::i64:
1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850 RC = &AArch64::GPR64RegClass;
1851 break;
1852 case MVT::f32:
1853 Opc = FPOpcTable[Idx][0];
1854 RC = &AArch64::FPR32RegClass;
1855 break;
1856 case MVT::f64:
1857 Opc = FPOpcTable[Idx][1];
1858 RC = &AArch64::FPR64RegClass;
1859 break;
1860 }
1861
1862 // Create the base instruction, then add the operands.
1863 unsigned ResultReg = createResultReg(RC);
1864 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1865 TII.get(Opc), ResultReg);
1866 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868 // Loading an i1 requires special handling.
1869 if (VT == MVT::i1) {
1870 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1871 assert(ANDReg && "Unexpected AND instruction emission failure.");
1872 ResultReg = ANDReg;
1873 }
1874
1875 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876 // the 32bit reg to a 64bit reg.
1877 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1880 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881 .addImm(0)
1882 .addReg(ResultReg, getKillRegState(true))
1883 .addImm(AArch64::sub_32);
1884 ResultReg = Reg64;
1885 }
1886 return ResultReg;
1887 }
1888
selectAddSub(const Instruction * I)1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890 MVT VT;
1891 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892 return false;
1893
1894 if (VT.isVector())
1895 return selectOperator(I, I->getOpcode());
1896
1897 unsigned ResultReg;
1898 switch (I->getOpcode()) {
1899 default:
1900 llvm_unreachable("Unexpected instruction.");
1901 case Instruction::Add:
1902 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903 break;
1904 case Instruction::Sub:
1905 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906 break;
1907 }
1908 if (!ResultReg)
1909 return false;
1910
1911 updateValueMap(I, ResultReg);
1912 return true;
1913 }
1914
selectLogicalOp(const Instruction * I)1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916 MVT VT;
1917 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918 return false;
1919
1920 if (VT.isVector())
1921 return selectOperator(I, I->getOpcode());
1922
1923 unsigned ResultReg;
1924 switch (I->getOpcode()) {
1925 default:
1926 llvm_unreachable("Unexpected instruction.");
1927 case Instruction::And:
1928 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929 break;
1930 case Instruction::Or:
1931 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932 break;
1933 case Instruction::Xor:
1934 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935 break;
1936 }
1937 if (!ResultReg)
1938 return false;
1939
1940 updateValueMap(I, ResultReg);
1941 return true;
1942 }
1943
selectLoad(const Instruction * I)1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945 MVT VT;
1946 // Verify we have a legal type before going any further. Currently, we handle
1947 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950 cast<LoadInst>(I)->isAtomic())
1951 return false;
1952
1953 const Value *SV = I->getOperand(0);
1954 if (TLI.supportSwiftError()) {
1955 // Swifterror values can come from either a function parameter with
1956 // swifterror attribute or an alloca with swifterror attribute.
1957 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958 if (Arg->hasSwiftErrorAttr())
1959 return false;
1960 }
1961
1962 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963 if (Alloca->isSwiftError())
1964 return false;
1965 }
1966 }
1967
1968 // See if we can handle this address.
1969 Address Addr;
1970 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971 return false;
1972
1973 // Fold the following sign-/zero-extend into the load instruction.
1974 bool WantZExt = true;
1975 MVT RetVT = VT;
1976 const Value *IntExtVal = nullptr;
1977 if (I->hasOneUse()) {
1978 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979 if (isTypeSupported(ZE->getType(), RetVT))
1980 IntExtVal = ZE;
1981 else
1982 RetVT = VT;
1983 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984 if (isTypeSupported(SE->getType(), RetVT))
1985 IntExtVal = SE;
1986 else
1987 RetVT = VT;
1988 WantZExt = false;
1989 }
1990 }
1991
1992 unsigned ResultReg =
1993 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994 if (!ResultReg)
1995 return false;
1996
1997 // There are a few different cases we have to handle, because the load or the
1998 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999 // SelectionDAG. There is also an ordering issue when both instructions are in
2000 // different basic blocks.
2001 // 1.) The load instruction is selected by FastISel, but the integer extend
2002 // not. This usually happens when the integer extend is in a different
2003 // basic block and SelectionDAG took over for that basic block.
2004 // 2.) The load instruction is selected before the integer extend. This only
2005 // happens when the integer extend is in a different basic block.
2006 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007 // by FastISel. This happens if there are instructions between the load
2008 // and the integer extend that couldn't be selected by FastISel.
2009 if (IntExtVal) {
2010 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012 // it when it selects the integer extend.
2013 unsigned Reg = lookUpRegForValue(IntExtVal);
2014 auto *MI = MRI.getUniqueVRegDef(Reg);
2015 if (!MI) {
2016 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017 if (WantZExt) {
2018 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020 ResultReg = std::prev(I)->getOperand(0).getReg();
2021 removeDeadCode(I, std::next(I));
2022 } else
2023 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024 /*IsKill=*/true,
2025 AArch64::sub_32);
2026 }
2027 updateValueMap(I, ResultReg);
2028 return true;
2029 }
2030
2031 // The integer extend has already been emitted - delete all the instructions
2032 // that have been emitted by the integer extend lowering code and use the
2033 // result from the load instruction directly.
2034 while (MI) {
2035 Reg = 0;
2036 for (auto &Opnd : MI->uses()) {
2037 if (Opnd.isReg()) {
2038 Reg = Opnd.getReg();
2039 break;
2040 }
2041 }
2042 MachineBasicBlock::iterator I(MI);
2043 removeDeadCode(I, std::next(I));
2044 MI = nullptr;
2045 if (Reg)
2046 MI = MRI.getUniqueVRegDef(Reg);
2047 }
2048 updateValueMap(IntExtVal, ResultReg);
2049 return true;
2050 }
2051
2052 updateValueMap(I, ResultReg);
2053 return true;
2054 }
2055
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2056 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2057 unsigned AddrReg,
2058 MachineMemOperand *MMO) {
2059 unsigned Opc;
2060 switch (VT.SimpleTy) {
2061 default: return false;
2062 case MVT::i8: Opc = AArch64::STLRB; break;
2063 case MVT::i16: Opc = AArch64::STLRH; break;
2064 case MVT::i32: Opc = AArch64::STLRW; break;
2065 case MVT::i64: Opc = AArch64::STLRX; break;
2066 }
2067
2068 const MCInstrDesc &II = TII.get(Opc);
2069 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2070 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2071 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2072 .addReg(SrcReg)
2073 .addReg(AddrReg)
2074 .addMemOperand(MMO);
2075 return true;
2076 }
2077
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2078 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2079 MachineMemOperand *MMO) {
2080 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2081 return false;
2082
2083 // Simplify this down to something we can handle.
2084 if (!simplifyAddress(Addr, VT))
2085 return false;
2086
2087 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2088 if (!ScaleFactor)
2089 llvm_unreachable("Unexpected value type.");
2090
2091 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2092 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2093 bool UseScaled = true;
2094 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2095 UseScaled = false;
2096 ScaleFactor = 1;
2097 }
2098
2099 static const unsigned OpcTable[4][6] = {
2100 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2101 AArch64::STURSi, AArch64::STURDi },
2102 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2103 AArch64::STRSui, AArch64::STRDui },
2104 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2105 AArch64::STRSroX, AArch64::STRDroX },
2106 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2107 AArch64::STRSroW, AArch64::STRDroW }
2108 };
2109
2110 unsigned Opc;
2111 bool VTIsi1 = false;
2112 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2113 Addr.getOffsetReg();
2114 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2115 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2116 Addr.getExtendType() == AArch64_AM::SXTW)
2117 Idx++;
2118
2119 switch (VT.SimpleTy) {
2120 default: llvm_unreachable("Unexpected value type.");
2121 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
2122 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2123 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2124 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2125 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2126 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2127 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2128 }
2129
2130 // Storing an i1 requires special handling.
2131 if (VTIsi1 && SrcReg != AArch64::WZR) {
2132 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2133 assert(ANDReg && "Unexpected AND instruction emission failure.");
2134 SrcReg = ANDReg;
2135 }
2136 // Create the base instruction, then add the operands.
2137 const MCInstrDesc &II = TII.get(Opc);
2138 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2139 MachineInstrBuilder MIB =
2140 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2141 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2142
2143 return true;
2144 }
2145
selectStore(const Instruction * I)2146 bool AArch64FastISel::selectStore(const Instruction *I) {
2147 MVT VT;
2148 const Value *Op0 = I->getOperand(0);
2149 // Verify we have a legal type before going any further. Currently, we handle
2150 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2151 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2152 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2153 return false;
2154
2155 const Value *PtrV = I->getOperand(1);
2156 if (TLI.supportSwiftError()) {
2157 // Swifterror values can come from either a function parameter with
2158 // swifterror attribute or an alloca with swifterror attribute.
2159 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2160 if (Arg->hasSwiftErrorAttr())
2161 return false;
2162 }
2163
2164 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2165 if (Alloca->isSwiftError())
2166 return false;
2167 }
2168 }
2169
2170 // Get the value to be stored into a register. Use the zero register directly
2171 // when possible to avoid an unnecessary copy and a wasted register.
2172 unsigned SrcReg = 0;
2173 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2174 if (CI->isZero())
2175 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2177 if (CF->isZero() && !CF->isNegative()) {
2178 VT = MVT::getIntegerVT(VT.getSizeInBits());
2179 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180 }
2181 }
2182
2183 if (!SrcReg)
2184 SrcReg = getRegForValue(Op0);
2185
2186 if (!SrcReg)
2187 return false;
2188
2189 auto *SI = cast<StoreInst>(I);
2190
2191 // Try to emit a STLR for seq_cst/release.
2192 if (SI->isAtomic()) {
2193 AtomicOrdering Ord = SI->getOrdering();
2194 // The non-atomic instructions are sufficient for relaxed stores.
2195 if (isReleaseOrStronger(Ord)) {
2196 // The STLR addressing mode only supports a base reg; pass that directly.
2197 unsigned AddrReg = getRegForValue(PtrV);
2198 return emitStoreRelease(VT, SrcReg, AddrReg,
2199 createMachineMemOperandFor(I));
2200 }
2201 }
2202
2203 // See if we can handle this address.
2204 Address Addr;
2205 if (!computeAddress(PtrV, Addr, Op0->getType()))
2206 return false;
2207
2208 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2209 return false;
2210 return true;
2211 }
2212
getCompareCC(CmpInst::Predicate Pred)2213 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2214 switch (Pred) {
2215 case CmpInst::FCMP_ONE:
2216 case CmpInst::FCMP_UEQ:
2217 default:
2218 // AL is our "false" for now. The other two need more compares.
2219 return AArch64CC::AL;
2220 case CmpInst::ICMP_EQ:
2221 case CmpInst::FCMP_OEQ:
2222 return AArch64CC::EQ;
2223 case CmpInst::ICMP_SGT:
2224 case CmpInst::FCMP_OGT:
2225 return AArch64CC::GT;
2226 case CmpInst::ICMP_SGE:
2227 case CmpInst::FCMP_OGE:
2228 return AArch64CC::GE;
2229 case CmpInst::ICMP_UGT:
2230 case CmpInst::FCMP_UGT:
2231 return AArch64CC::HI;
2232 case CmpInst::FCMP_OLT:
2233 return AArch64CC::MI;
2234 case CmpInst::ICMP_ULE:
2235 case CmpInst::FCMP_OLE:
2236 return AArch64CC::LS;
2237 case CmpInst::FCMP_ORD:
2238 return AArch64CC::VC;
2239 case CmpInst::FCMP_UNO:
2240 return AArch64CC::VS;
2241 case CmpInst::FCMP_UGE:
2242 return AArch64CC::PL;
2243 case CmpInst::ICMP_SLT:
2244 case CmpInst::FCMP_ULT:
2245 return AArch64CC::LT;
2246 case CmpInst::ICMP_SLE:
2247 case CmpInst::FCMP_ULE:
2248 return AArch64CC::LE;
2249 case CmpInst::FCMP_UNE:
2250 case CmpInst::ICMP_NE:
2251 return AArch64CC::NE;
2252 case CmpInst::ICMP_UGE:
2253 return AArch64CC::HS;
2254 case CmpInst::ICMP_ULT:
2255 return AArch64CC::LO;
2256 }
2257 }
2258
2259 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2260 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2261 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2262 // will not be produced, as they are conditional branch instructions that do
2263 // not set flags.
2264 if (FuncInfo.MF->getFunction().hasFnAttribute(
2265 Attribute::SpeculativeLoadHardening))
2266 return false;
2267
2268 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2269 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2270 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2271
2272 const Value *LHS = CI->getOperand(0);
2273 const Value *RHS = CI->getOperand(1);
2274
2275 MVT VT;
2276 if (!isTypeSupported(LHS->getType(), VT))
2277 return false;
2278
2279 unsigned BW = VT.getSizeInBits();
2280 if (BW > 64)
2281 return false;
2282
2283 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2284 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2285
2286 // Try to take advantage of fallthrough opportunities.
2287 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2288 std::swap(TBB, FBB);
2289 Predicate = CmpInst::getInversePredicate(Predicate);
2290 }
2291
2292 int TestBit = -1;
2293 bool IsCmpNE;
2294 switch (Predicate) {
2295 default:
2296 return false;
2297 case CmpInst::ICMP_EQ:
2298 case CmpInst::ICMP_NE:
2299 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2300 std::swap(LHS, RHS);
2301
2302 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2303 return false;
2304
2305 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2306 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2307 const Value *AndLHS = AI->getOperand(0);
2308 const Value *AndRHS = AI->getOperand(1);
2309
2310 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2311 if (C->getValue().isPowerOf2())
2312 std::swap(AndLHS, AndRHS);
2313
2314 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2315 if (C->getValue().isPowerOf2()) {
2316 TestBit = C->getValue().logBase2();
2317 LHS = AndLHS;
2318 }
2319 }
2320
2321 if (VT == MVT::i1)
2322 TestBit = 0;
2323
2324 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2325 break;
2326 case CmpInst::ICMP_SLT:
2327 case CmpInst::ICMP_SGE:
2328 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2329 return false;
2330
2331 TestBit = BW - 1;
2332 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2333 break;
2334 case CmpInst::ICMP_SGT:
2335 case CmpInst::ICMP_SLE:
2336 if (!isa<ConstantInt>(RHS))
2337 return false;
2338
2339 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2340 return false;
2341
2342 TestBit = BW - 1;
2343 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2344 break;
2345 } // end switch
2346
2347 static const unsigned OpcTable[2][2][2] = {
2348 { {AArch64::CBZW, AArch64::CBZX },
2349 {AArch64::CBNZW, AArch64::CBNZX} },
2350 { {AArch64::TBZW, AArch64::TBZX },
2351 {AArch64::TBNZW, AArch64::TBNZX} }
2352 };
2353
2354 bool IsBitTest = TestBit != -1;
2355 bool Is64Bit = BW == 64;
2356 if (TestBit < 32 && TestBit >= 0)
2357 Is64Bit = false;
2358
2359 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2360 const MCInstrDesc &II = TII.get(Opc);
2361
2362 unsigned SrcReg = getRegForValue(LHS);
2363 if (!SrcReg)
2364 return false;
2365 bool SrcIsKill = hasTrivialKill(LHS);
2366
2367 if (BW == 64 && !Is64Bit)
2368 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2369 AArch64::sub_32);
2370
2371 if ((BW < 32) && !IsBitTest)
2372 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2373
2374 // Emit the combined compare and branch instruction.
2375 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2376 MachineInstrBuilder MIB =
2377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2378 .addReg(SrcReg, getKillRegState(SrcIsKill));
2379 if (IsBitTest)
2380 MIB.addImm(TestBit);
2381 MIB.addMBB(TBB);
2382
2383 finishCondBranch(BI->getParent(), TBB, FBB);
2384 return true;
2385 }
2386
selectBranch(const Instruction * I)2387 bool AArch64FastISel::selectBranch(const Instruction *I) {
2388 const BranchInst *BI = cast<BranchInst>(I);
2389 if (BI->isUnconditional()) {
2390 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2391 fastEmitBranch(MSucc, BI->getDebugLoc());
2392 return true;
2393 }
2394
2395 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2396 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2397
2398 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2399 if (CI->hasOneUse() && isValueAvailable(CI)) {
2400 // Try to optimize or fold the cmp.
2401 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2402 switch (Predicate) {
2403 default:
2404 break;
2405 case CmpInst::FCMP_FALSE:
2406 fastEmitBranch(FBB, DbgLoc);
2407 return true;
2408 case CmpInst::FCMP_TRUE:
2409 fastEmitBranch(TBB, DbgLoc);
2410 return true;
2411 }
2412
2413 // Try to emit a combined compare-and-branch first.
2414 if (emitCompareAndBranch(BI))
2415 return true;
2416
2417 // Try to take advantage of fallthrough opportunities.
2418 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2419 std::swap(TBB, FBB);
2420 Predicate = CmpInst::getInversePredicate(Predicate);
2421 }
2422
2423 // Emit the cmp.
2424 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2425 return false;
2426
2427 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2428 // instruction.
2429 AArch64CC::CondCode CC = getCompareCC(Predicate);
2430 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2431 switch (Predicate) {
2432 default:
2433 break;
2434 case CmpInst::FCMP_UEQ:
2435 ExtraCC = AArch64CC::EQ;
2436 CC = AArch64CC::VS;
2437 break;
2438 case CmpInst::FCMP_ONE:
2439 ExtraCC = AArch64CC::MI;
2440 CC = AArch64CC::GT;
2441 break;
2442 }
2443 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2444
2445 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2446 if (ExtraCC != AArch64CC::AL) {
2447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2448 .addImm(ExtraCC)
2449 .addMBB(TBB);
2450 }
2451
2452 // Emit the branch.
2453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2454 .addImm(CC)
2455 .addMBB(TBB);
2456
2457 finishCondBranch(BI->getParent(), TBB, FBB);
2458 return true;
2459 }
2460 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2461 uint64_t Imm = CI->getZExtValue();
2462 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2464 .addMBB(Target);
2465
2466 // Obtain the branch probability and add the target to the successor list.
2467 if (FuncInfo.BPI) {
2468 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2469 BI->getParent(), Target->getBasicBlock());
2470 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2471 } else
2472 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2473 return true;
2474 } else {
2475 AArch64CC::CondCode CC = AArch64CC::NE;
2476 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2477 // Fake request the condition, otherwise the intrinsic might be completely
2478 // optimized away.
2479 unsigned CondReg = getRegForValue(BI->getCondition());
2480 if (!CondReg)
2481 return false;
2482
2483 // Emit the branch.
2484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2485 .addImm(CC)
2486 .addMBB(TBB);
2487
2488 finishCondBranch(BI->getParent(), TBB, FBB);
2489 return true;
2490 }
2491 }
2492
2493 unsigned CondReg = getRegForValue(BI->getCondition());
2494 if (CondReg == 0)
2495 return false;
2496 bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2497
2498 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2499 unsigned Opcode = AArch64::TBNZW;
2500 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2501 std::swap(TBB, FBB);
2502 Opcode = AArch64::TBZW;
2503 }
2504
2505 const MCInstrDesc &II = TII.get(Opcode);
2506 unsigned ConstrainedCondReg
2507 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2508 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2509 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2510 .addImm(0)
2511 .addMBB(TBB);
2512
2513 finishCondBranch(BI->getParent(), TBB, FBB);
2514 return true;
2515 }
2516
selectIndirectBr(const Instruction * I)2517 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2518 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2519 unsigned AddrReg = getRegForValue(BI->getOperand(0));
2520 if (AddrReg == 0)
2521 return false;
2522
2523 // Emit the indirect branch.
2524 const MCInstrDesc &II = TII.get(AArch64::BR);
2525 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2527
2528 // Make sure the CFG is up-to-date.
2529 for (auto *Succ : BI->successors())
2530 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531
2532 return true;
2533 }
2534
selectCmp(const Instruction * I)2535 bool AArch64FastISel::selectCmp(const Instruction *I) {
2536 const CmpInst *CI = cast<CmpInst>(I);
2537
2538 // Vectors of i1 are weird: bail out.
2539 if (CI->getType()->isVectorTy())
2540 return false;
2541
2542 // Try to optimize or fold the cmp.
2543 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544 unsigned ResultReg = 0;
2545 switch (Predicate) {
2546 default:
2547 break;
2548 case CmpInst::FCMP_FALSE:
2549 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2551 TII.get(TargetOpcode::COPY), ResultReg)
2552 .addReg(AArch64::WZR, getKillRegState(true));
2553 break;
2554 case CmpInst::FCMP_TRUE:
2555 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556 break;
2557 }
2558
2559 if (ResultReg) {
2560 updateValueMap(I, ResultReg);
2561 return true;
2562 }
2563
2564 // Emit the cmp.
2565 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566 return false;
2567
2568 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569
2570 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571 // condition codes are inverted, because they are used by CSINC.
2572 static unsigned CondCodeTable[2][2] = {
2573 { AArch64CC::NE, AArch64CC::VC },
2574 { AArch64CC::PL, AArch64CC::LE }
2575 };
2576 unsigned *CondCodes = nullptr;
2577 switch (Predicate) {
2578 default:
2579 break;
2580 case CmpInst::FCMP_UEQ:
2581 CondCodes = &CondCodeTable[0][0];
2582 break;
2583 case CmpInst::FCMP_ONE:
2584 CondCodes = &CondCodeTable[1][0];
2585 break;
2586 }
2587
2588 if (CondCodes) {
2589 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2591 TmpReg1)
2592 .addReg(AArch64::WZR, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[0]);
2595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2596 ResultReg)
2597 .addReg(TmpReg1, getKillRegState(true))
2598 .addReg(AArch64::WZR, getKillRegState(true))
2599 .addImm(CondCodes[1]);
2600
2601 updateValueMap(I, ResultReg);
2602 return true;
2603 }
2604
2605 // Now set a register based on the comparison.
2606 AArch64CC::CondCode CC = getCompareCC(Predicate);
2607 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2610 ResultReg)
2611 .addReg(AArch64::WZR, getKillRegState(true))
2612 .addReg(AArch64::WZR, getKillRegState(true))
2613 .addImm(invertedCC);
2614
2615 updateValueMap(I, ResultReg);
2616 return true;
2617 }
2618
2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620 /// value.
optimizeSelect(const SelectInst * SI)2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622 if (!SI->getType()->isIntegerTy(1))
2623 return false;
2624
2625 const Value *Src1Val, *Src2Val;
2626 unsigned Opc = 0;
2627 bool NeedExtraOp = false;
2628 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629 if (CI->isOne()) {
2630 Src1Val = SI->getCondition();
2631 Src2Val = SI->getFalseValue();
2632 Opc = AArch64::ORRWrr;
2633 } else {
2634 assert(CI->isZero());
2635 Src1Val = SI->getFalseValue();
2636 Src2Val = SI->getCondition();
2637 Opc = AArch64::BICWrr;
2638 }
2639 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640 if (CI->isOne()) {
2641 Src1Val = SI->getCondition();
2642 Src2Val = SI->getTrueValue();
2643 Opc = AArch64::ORRWrr;
2644 NeedExtraOp = true;
2645 } else {
2646 assert(CI->isZero());
2647 Src1Val = SI->getCondition();
2648 Src2Val = SI->getTrueValue();
2649 Opc = AArch64::ANDWrr;
2650 }
2651 }
2652
2653 if (!Opc)
2654 return false;
2655
2656 unsigned Src1Reg = getRegForValue(Src1Val);
2657 if (!Src1Reg)
2658 return false;
2659 bool Src1IsKill = hasTrivialKill(Src1Val);
2660
2661 unsigned Src2Reg = getRegForValue(Src2Val);
2662 if (!Src2Reg)
2663 return false;
2664 bool Src2IsKill = hasTrivialKill(Src2Val);
2665
2666 if (NeedExtraOp) {
2667 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2668 Src1IsKill = true;
2669 }
2670 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2671 Src1IsKill, Src2Reg, Src2IsKill);
2672 updateValueMap(SI, ResultReg);
2673 return true;
2674 }
2675
selectSelect(const Instruction * I)2676 bool AArch64FastISel::selectSelect(const Instruction *I) {
2677 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2678 MVT VT;
2679 if (!isTypeSupported(I->getType(), VT))
2680 return false;
2681
2682 unsigned Opc;
2683 const TargetRegisterClass *RC;
2684 switch (VT.SimpleTy) {
2685 default:
2686 return false;
2687 case MVT::i1:
2688 case MVT::i8:
2689 case MVT::i16:
2690 case MVT::i32:
2691 Opc = AArch64::CSELWr;
2692 RC = &AArch64::GPR32RegClass;
2693 break;
2694 case MVT::i64:
2695 Opc = AArch64::CSELXr;
2696 RC = &AArch64::GPR64RegClass;
2697 break;
2698 case MVT::f32:
2699 Opc = AArch64::FCSELSrrr;
2700 RC = &AArch64::FPR32RegClass;
2701 break;
2702 case MVT::f64:
2703 Opc = AArch64::FCSELDrrr;
2704 RC = &AArch64::FPR64RegClass;
2705 break;
2706 }
2707
2708 const SelectInst *SI = cast<SelectInst>(I);
2709 const Value *Cond = SI->getCondition();
2710 AArch64CC::CondCode CC = AArch64CC::NE;
2711 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2712
2713 if (optimizeSelect(SI))
2714 return true;
2715
2716 // Try to pickup the flags, so we don't have to emit another compare.
2717 if (foldXALUIntrinsic(CC, I, Cond)) {
2718 // Fake request the condition to force emission of the XALU intrinsic.
2719 unsigned CondReg = getRegForValue(Cond);
2720 if (!CondReg)
2721 return false;
2722 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2723 isValueAvailable(Cond)) {
2724 const auto *Cmp = cast<CmpInst>(Cond);
2725 // Try to optimize or fold the cmp.
2726 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2727 const Value *FoldSelect = nullptr;
2728 switch (Predicate) {
2729 default:
2730 break;
2731 case CmpInst::FCMP_FALSE:
2732 FoldSelect = SI->getFalseValue();
2733 break;
2734 case CmpInst::FCMP_TRUE:
2735 FoldSelect = SI->getTrueValue();
2736 break;
2737 }
2738
2739 if (FoldSelect) {
2740 unsigned SrcReg = getRegForValue(FoldSelect);
2741 if (!SrcReg)
2742 return false;
2743 unsigned UseReg = lookUpRegForValue(SI);
2744 if (UseReg)
2745 MRI.clearKillFlags(UseReg);
2746
2747 updateValueMap(I, SrcReg);
2748 return true;
2749 }
2750
2751 // Emit the cmp.
2752 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2753 return false;
2754
2755 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2756 CC = getCompareCC(Predicate);
2757 switch (Predicate) {
2758 default:
2759 break;
2760 case CmpInst::FCMP_UEQ:
2761 ExtraCC = AArch64CC::EQ;
2762 CC = AArch64CC::VS;
2763 break;
2764 case CmpInst::FCMP_ONE:
2765 ExtraCC = AArch64CC::MI;
2766 CC = AArch64CC::GT;
2767 break;
2768 }
2769 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2770 } else {
2771 unsigned CondReg = getRegForValue(Cond);
2772 if (!CondReg)
2773 return false;
2774 bool CondIsKill = hasTrivialKill(Cond);
2775
2776 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2777 CondReg = constrainOperandRegClass(II, CondReg, 1);
2778
2779 // Emit a TST instruction (ANDS wzr, reg, #imm).
2780 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2781 AArch64::WZR)
2782 .addReg(CondReg, getKillRegState(CondIsKill))
2783 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2784 }
2785
2786 unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2787 bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2788
2789 unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2790 bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2791
2792 if (!Src1Reg || !Src2Reg)
2793 return false;
2794
2795 if (ExtraCC != AArch64CC::AL) {
2796 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797 Src2IsKill, ExtraCC);
2798 Src2IsKill = true;
2799 }
2800 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2801 Src2IsKill, CC);
2802 updateValueMap(I, ResultReg);
2803 return true;
2804 }
2805
selectFPExt(const Instruction * I)2806 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2807 Value *V = I->getOperand(0);
2808 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2809 return false;
2810
2811 unsigned Op = getRegForValue(V);
2812 if (Op == 0)
2813 return false;
2814
2815 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2816 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2817 ResultReg).addReg(Op);
2818 updateValueMap(I, ResultReg);
2819 return true;
2820 }
2821
selectFPTrunc(const Instruction * I)2822 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2823 Value *V = I->getOperand(0);
2824 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2825 return false;
2826
2827 unsigned Op = getRegForValue(V);
2828 if (Op == 0)
2829 return false;
2830
2831 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2832 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2833 ResultReg).addReg(Op);
2834 updateValueMap(I, ResultReg);
2835 return true;
2836 }
2837
2838 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2839 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2840 MVT DestVT;
2841 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2842 return false;
2843
2844 unsigned SrcReg = getRegForValue(I->getOperand(0));
2845 if (SrcReg == 0)
2846 return false;
2847
2848 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2849 if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2850 return false;
2851
2852 unsigned Opc;
2853 if (SrcVT == MVT::f64) {
2854 if (Signed)
2855 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2856 else
2857 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2858 } else {
2859 if (Signed)
2860 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2861 else
2862 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2863 }
2864 unsigned ResultReg = createResultReg(
2865 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2866 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2867 .addReg(SrcReg);
2868 updateValueMap(I, ResultReg);
2869 return true;
2870 }
2871
selectIntToFP(const Instruction * I,bool Signed)2872 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2873 MVT DestVT;
2874 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2875 return false;
2876 // Let regular ISEL handle FP16
2877 if (DestVT == MVT::f16)
2878 return false;
2879
2880 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2881 "Unexpected value type.");
2882
2883 unsigned SrcReg = getRegForValue(I->getOperand(0));
2884 if (!SrcReg)
2885 return false;
2886 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2887
2888 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2889
2890 // Handle sign-extension.
2891 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2892 SrcReg =
2893 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2894 if (!SrcReg)
2895 return false;
2896 SrcIsKill = true;
2897 }
2898
2899 unsigned Opc;
2900 if (SrcVT == MVT::i64) {
2901 if (Signed)
2902 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2903 else
2904 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2905 } else {
2906 if (Signed)
2907 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2908 else
2909 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2910 }
2911
2912 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2913 SrcIsKill);
2914 updateValueMap(I, ResultReg);
2915 return true;
2916 }
2917
fastLowerArguments()2918 bool AArch64FastISel::fastLowerArguments() {
2919 if (!FuncInfo.CanLowerReturn)
2920 return false;
2921
2922 const Function *F = FuncInfo.Fn;
2923 if (F->isVarArg())
2924 return false;
2925
2926 CallingConv::ID CC = F->getCallingConv();
2927 if (CC != CallingConv::C && CC != CallingConv::Swift)
2928 return false;
2929
2930 if (Subtarget->hasCustomCallingConv())
2931 return false;
2932
2933 // Only handle simple cases of up to 8 GPR and FPR each.
2934 unsigned GPRCnt = 0;
2935 unsigned FPRCnt = 0;
2936 for (auto const &Arg : F->args()) {
2937 if (Arg.hasAttribute(Attribute::ByVal) ||
2938 Arg.hasAttribute(Attribute::InReg) ||
2939 Arg.hasAttribute(Attribute::StructRet) ||
2940 Arg.hasAttribute(Attribute::SwiftSelf) ||
2941 Arg.hasAttribute(Attribute::SwiftError) ||
2942 Arg.hasAttribute(Attribute::Nest))
2943 return false;
2944
2945 Type *ArgTy = Arg.getType();
2946 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2947 return false;
2948
2949 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2950 if (!ArgVT.isSimple())
2951 return false;
2952
2953 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2954 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2955 return false;
2956
2957 if (VT.isVector() &&
2958 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2959 return false;
2960
2961 if (VT >= MVT::i1 && VT <= MVT::i64)
2962 ++GPRCnt;
2963 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2964 VT.is128BitVector())
2965 ++FPRCnt;
2966 else
2967 return false;
2968
2969 if (GPRCnt > 8 || FPRCnt > 8)
2970 return false;
2971 }
2972
2973 static const MCPhysReg Registers[6][8] = {
2974 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2975 AArch64::W5, AArch64::W6, AArch64::W7 },
2976 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2977 AArch64::X5, AArch64::X6, AArch64::X7 },
2978 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2979 AArch64::H5, AArch64::H6, AArch64::H7 },
2980 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2981 AArch64::S5, AArch64::S6, AArch64::S7 },
2982 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2983 AArch64::D5, AArch64::D6, AArch64::D7 },
2984 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2985 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2986 };
2987
2988 unsigned GPRIdx = 0;
2989 unsigned FPRIdx = 0;
2990 for (auto const &Arg : F->args()) {
2991 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2992 unsigned SrcReg;
2993 const TargetRegisterClass *RC;
2994 if (VT >= MVT::i1 && VT <= MVT::i32) {
2995 SrcReg = Registers[0][GPRIdx++];
2996 RC = &AArch64::GPR32RegClass;
2997 VT = MVT::i32;
2998 } else if (VT == MVT::i64) {
2999 SrcReg = Registers[1][GPRIdx++];
3000 RC = &AArch64::GPR64RegClass;
3001 } else if (VT == MVT::f16) {
3002 SrcReg = Registers[2][FPRIdx++];
3003 RC = &AArch64::FPR16RegClass;
3004 } else if (VT == MVT::f32) {
3005 SrcReg = Registers[3][FPRIdx++];
3006 RC = &AArch64::FPR32RegClass;
3007 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3008 SrcReg = Registers[4][FPRIdx++];
3009 RC = &AArch64::FPR64RegClass;
3010 } else if (VT.is128BitVector()) {
3011 SrcReg = Registers[5][FPRIdx++];
3012 RC = &AArch64::FPR128RegClass;
3013 } else
3014 llvm_unreachable("Unexpected value type.");
3015
3016 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3017 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3018 // Without this, EmitLiveInCopies may eliminate the livein if its only
3019 // use is a bitcast (which isn't turned into an instruction).
3020 unsigned ResultReg = createResultReg(RC);
3021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3022 TII.get(TargetOpcode::COPY), ResultReg)
3023 .addReg(DstReg, getKillRegState(true));
3024 updateValueMap(&Arg, ResultReg);
3025 }
3026 return true;
3027 }
3028
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)3029 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3030 SmallVectorImpl<MVT> &OutVTs,
3031 unsigned &NumBytes) {
3032 CallingConv::ID CC = CLI.CallConv;
3033 SmallVector<CCValAssign, 16> ArgLocs;
3034 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3035 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3036
3037 // Get a count of how many bytes are to be pushed on the stack.
3038 NumBytes = CCInfo.getNextStackOffset();
3039
3040 // Issue CALLSEQ_START
3041 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3042 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3043 .addImm(NumBytes).addImm(0);
3044
3045 // Process the args.
3046 for (CCValAssign &VA : ArgLocs) {
3047 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3048 MVT ArgVT = OutVTs[VA.getValNo()];
3049
3050 unsigned ArgReg = getRegForValue(ArgVal);
3051 if (!ArgReg)
3052 return false;
3053
3054 // Handle arg promotion: SExt, ZExt, AExt.
3055 switch (VA.getLocInfo()) {
3056 case CCValAssign::Full:
3057 break;
3058 case CCValAssign::SExt: {
3059 MVT DestVT = VA.getLocVT();
3060 MVT SrcVT = ArgVT;
3061 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3062 if (!ArgReg)
3063 return false;
3064 break;
3065 }
3066 case CCValAssign::AExt:
3067 // Intentional fall-through.
3068 case CCValAssign::ZExt: {
3069 MVT DestVT = VA.getLocVT();
3070 MVT SrcVT = ArgVT;
3071 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3072 if (!ArgReg)
3073 return false;
3074 break;
3075 }
3076 default:
3077 llvm_unreachable("Unknown arg promotion!");
3078 }
3079
3080 // Now copy/store arg to correct locations.
3081 if (VA.isRegLoc() && !VA.needsCustom()) {
3082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3083 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3084 CLI.OutRegs.push_back(VA.getLocReg());
3085 } else if (VA.needsCustom()) {
3086 // FIXME: Handle custom args.
3087 return false;
3088 } else {
3089 assert(VA.isMemLoc() && "Assuming store on stack.");
3090
3091 // Don't emit stores for undef values.
3092 if (isa<UndefValue>(ArgVal))
3093 continue;
3094
3095 // Need to store on the stack.
3096 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3097
3098 unsigned BEAlign = 0;
3099 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3100 BEAlign = 8 - ArgSize;
3101
3102 Address Addr;
3103 Addr.setKind(Address::RegBase);
3104 Addr.setReg(AArch64::SP);
3105 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3106
3107 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3108 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3109 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3110 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3111
3112 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3113 return false;
3114 }
3115 }
3116 return true;
3117 }
3118
finishCall(CallLoweringInfo & CLI,MVT RetVT,unsigned NumBytes)3119 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3120 unsigned NumBytes) {
3121 CallingConv::ID CC = CLI.CallConv;
3122
3123 // Issue CALLSEQ_END
3124 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3126 .addImm(NumBytes).addImm(0);
3127
3128 // Now the return value.
3129 if (RetVT != MVT::isVoid) {
3130 SmallVector<CCValAssign, 16> RVLocs;
3131 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3132 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3133
3134 // Only handle a single return value.
3135 if (RVLocs.size() != 1)
3136 return false;
3137
3138 // Copy all of the result registers out of their specified physreg.
3139 MVT CopyVT = RVLocs[0].getValVT();
3140
3141 // TODO: Handle big-endian results
3142 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3143 return false;
3144
3145 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3146 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3147 TII.get(TargetOpcode::COPY), ResultReg)
3148 .addReg(RVLocs[0].getLocReg());
3149 CLI.InRegs.push_back(RVLocs[0].getLocReg());
3150
3151 CLI.ResultReg = ResultReg;
3152 CLI.NumResultRegs = 1;
3153 }
3154
3155 return true;
3156 }
3157
fastLowerCall(CallLoweringInfo & CLI)3158 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3159 CallingConv::ID CC = CLI.CallConv;
3160 bool IsTailCall = CLI.IsTailCall;
3161 bool IsVarArg = CLI.IsVarArg;
3162 const Value *Callee = CLI.Callee;
3163 MCSymbol *Symbol = CLI.Symbol;
3164
3165 if (!Callee && !Symbol)
3166 return false;
3167
3168 // Allow SelectionDAG isel to handle tail calls.
3169 if (IsTailCall)
3170 return false;
3171
3172 CodeModel::Model CM = TM.getCodeModel();
3173 // Only support the small-addressing and large code models.
3174 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3175 return false;
3176
3177 // FIXME: Add large code model support for ELF.
3178 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3179 return false;
3180
3181 // Let SDISel handle vararg functions.
3182 if (IsVarArg)
3183 return false;
3184
3185 // FIXME: Only handle *simple* calls for now.
3186 MVT RetVT;
3187 if (CLI.RetTy->isVoidTy())
3188 RetVT = MVT::isVoid;
3189 else if (!isTypeLegal(CLI.RetTy, RetVT))
3190 return false;
3191
3192 for (auto Flag : CLI.OutFlags)
3193 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194 Flag.isSwiftSelf() || Flag.isSwiftError())
3195 return false;
3196
3197 // Set up the argument vectors.
3198 SmallVector<MVT, 16> OutVTs;
3199 OutVTs.reserve(CLI.OutVals.size());
3200
3201 for (auto *Val : CLI.OutVals) {
3202 MVT VT;
3203 if (!isTypeLegal(Val->getType(), VT) &&
3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205 return false;
3206
3207 // We don't handle vector parameters yet.
3208 if (VT.isVector() || VT.getSizeInBits() > 64)
3209 return false;
3210
3211 OutVTs.push_back(VT);
3212 }
3213
3214 Address Addr;
3215 if (Callee && !computeCallAddress(Callee, Addr))
3216 return false;
3217
3218 // Handle the arguments now that we've gotten them.
3219 unsigned NumBytes;
3220 if (!processCallArgs(CLI, OutVTs, NumBytes))
3221 return false;
3222
3223 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3224 if (RegInfo->isAnyArgRegReserved(*MF))
3225 RegInfo->emitReservedArgRegCallError(*MF);
3226
3227 // Issue the call.
3228 MachineInstrBuilder MIB;
3229 if (Subtarget->useSmallAddressing()) {
3230 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3231 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3232 if (Symbol)
3233 MIB.addSym(Symbol, 0);
3234 else if (Addr.getGlobalValue())
3235 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3236 else if (Addr.getReg()) {
3237 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3238 MIB.addReg(Reg);
3239 } else
3240 return false;
3241 } else {
3242 unsigned CallReg = 0;
3243 if (Symbol) {
3244 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3245 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3246 ADRPReg)
3247 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3248
3249 CallReg = createResultReg(&AArch64::GPR64RegClass);
3250 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3251 TII.get(AArch64::LDRXui), CallReg)
3252 .addReg(ADRPReg)
3253 .addSym(Symbol,
3254 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3255 } else if (Addr.getGlobalValue())
3256 CallReg = materializeGV(Addr.getGlobalValue());
3257 else if (Addr.getReg())
3258 CallReg = Addr.getReg();
3259
3260 if (!CallReg)
3261 return false;
3262
3263 const MCInstrDesc &II = TII.get(AArch64::BLR);
3264 CallReg = constrainOperandRegClass(II, CallReg, 0);
3265 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3266 }
3267
3268 // Add implicit physical register uses to the call.
3269 for (auto Reg : CLI.OutRegs)
3270 MIB.addReg(Reg, RegState::Implicit);
3271
3272 // Add a register mask with the call-preserved registers.
3273 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3274 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3275
3276 CLI.Call = MIB;
3277
3278 // Finish off the call including any return values.
3279 return finishCall(CLI, RetVT, NumBytes);
3280 }
3281
isMemCpySmall(uint64_t Len,unsigned Alignment)3282 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3283 if (Alignment)
3284 return Len / Alignment <= 4;
3285 else
3286 return Len < 32;
3287 }
3288
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,unsigned Alignment)3289 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3290 uint64_t Len, unsigned Alignment) {
3291 // Make sure we don't bloat code by inlining very large memcpy's.
3292 if (!isMemCpySmall(Len, Alignment))
3293 return false;
3294
3295 int64_t UnscaledOffset = 0;
3296 Address OrigDest = Dest;
3297 Address OrigSrc = Src;
3298
3299 while (Len) {
3300 MVT VT;
3301 if (!Alignment || Alignment >= 8) {
3302 if (Len >= 8)
3303 VT = MVT::i64;
3304 else if (Len >= 4)
3305 VT = MVT::i32;
3306 else if (Len >= 2)
3307 VT = MVT::i16;
3308 else {
3309 VT = MVT::i8;
3310 }
3311 } else {
3312 // Bound based on alignment.
3313 if (Len >= 4 && Alignment == 4)
3314 VT = MVT::i32;
3315 else if (Len >= 2 && Alignment == 2)
3316 VT = MVT::i16;
3317 else {
3318 VT = MVT::i8;
3319 }
3320 }
3321
3322 unsigned ResultReg = emitLoad(VT, VT, Src);
3323 if (!ResultReg)
3324 return false;
3325
3326 if (!emitStore(VT, ResultReg, Dest))
3327 return false;
3328
3329 int64_t Size = VT.getSizeInBits() / 8;
3330 Len -= Size;
3331 UnscaledOffset += Size;
3332
3333 // We need to recompute the unscaled offset for each iteration.
3334 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3335 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3336 }
3337
3338 return true;
3339 }
3340
3341 /// Check if it is possible to fold the condition from the XALU intrinsic
3342 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3343 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3344 const Instruction *I,
3345 const Value *Cond) {
3346 if (!isa<ExtractValueInst>(Cond))
3347 return false;
3348
3349 const auto *EV = cast<ExtractValueInst>(Cond);
3350 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3351 return false;
3352
3353 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3354 MVT RetVT;
3355 const Function *Callee = II->getCalledFunction();
3356 Type *RetTy =
3357 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3358 if (!isTypeLegal(RetTy, RetVT))
3359 return false;
3360
3361 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3362 return false;
3363
3364 const Value *LHS = II->getArgOperand(0);
3365 const Value *RHS = II->getArgOperand(1);
3366
3367 // Canonicalize immediate to the RHS.
3368 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3369 isCommutativeIntrinsic(II))
3370 std::swap(LHS, RHS);
3371
3372 // Simplify multiplies.
3373 Intrinsic::ID IID = II->getIntrinsicID();
3374 switch (IID) {
3375 default:
3376 break;
3377 case Intrinsic::smul_with_overflow:
3378 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3379 if (C->getValue() == 2)
3380 IID = Intrinsic::sadd_with_overflow;
3381 break;
3382 case Intrinsic::umul_with_overflow:
3383 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3384 if (C->getValue() == 2)
3385 IID = Intrinsic::uadd_with_overflow;
3386 break;
3387 }
3388
3389 AArch64CC::CondCode TmpCC;
3390 switch (IID) {
3391 default:
3392 return false;
3393 case Intrinsic::sadd_with_overflow:
3394 case Intrinsic::ssub_with_overflow:
3395 TmpCC = AArch64CC::VS;
3396 break;
3397 case Intrinsic::uadd_with_overflow:
3398 TmpCC = AArch64CC::HS;
3399 break;
3400 case Intrinsic::usub_with_overflow:
3401 TmpCC = AArch64CC::LO;
3402 break;
3403 case Intrinsic::smul_with_overflow:
3404 case Intrinsic::umul_with_overflow:
3405 TmpCC = AArch64CC::NE;
3406 break;
3407 }
3408
3409 // Check if both instructions are in the same basic block.
3410 if (!isValueAvailable(II))
3411 return false;
3412
3413 // Make sure nothing is in the way
3414 BasicBlock::const_iterator Start(I);
3415 BasicBlock::const_iterator End(II);
3416 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3417 // We only expect extractvalue instructions between the intrinsic and the
3418 // instruction to be selected.
3419 if (!isa<ExtractValueInst>(Itr))
3420 return false;
3421
3422 // Check that the extractvalue operand comes from the intrinsic.
3423 const auto *EVI = cast<ExtractValueInst>(Itr);
3424 if (EVI->getAggregateOperand() != II)
3425 return false;
3426 }
3427
3428 CC = TmpCC;
3429 return true;
3430 }
3431
fastLowerIntrinsicCall(const IntrinsicInst * II)3432 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3433 // FIXME: Handle more intrinsics.
3434 switch (II->getIntrinsicID()) {
3435 default: return false;
3436 case Intrinsic::frameaddress: {
3437 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438 MFI.setFrameAddressIsTaken(true);
3439
3440 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3441 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3442 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3444 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3445 // Recursively load frame address
3446 // ldr x0, [fp]
3447 // ldr x0, [x0]
3448 // ldr x0, [x0]
3449 // ...
3450 unsigned DestReg;
3451 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3452 while (Depth--) {
3453 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3454 SrcReg, /*IsKill=*/true, 0);
3455 assert(DestReg && "Unexpected LDR instruction emission failure.");
3456 SrcReg = DestReg;
3457 }
3458
3459 updateValueMap(II, SrcReg);
3460 return true;
3461 }
3462 case Intrinsic::sponentry: {
3463 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3464
3465 // SP = FP + Fixed Object + 16
3466 int FI = MFI.CreateFixedObject(4, 0, false);
3467 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3469 TII.get(AArch64::ADDXri), ResultReg)
3470 .addFrameIndex(FI)
3471 .addImm(0)
3472 .addImm(0);
3473
3474 updateValueMap(II, ResultReg);
3475 return true;
3476 }
3477 case Intrinsic::memcpy:
3478 case Intrinsic::memmove: {
3479 const auto *MTI = cast<MemTransferInst>(II);
3480 // Don't handle volatile.
3481 if (MTI->isVolatile())
3482 return false;
3483
3484 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3485 // we would emit dead code because we don't currently handle memmoves.
3486 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3487 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3488 // Small memcpy's are common enough that we want to do them without a call
3489 // if possible.
3490 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3491 unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3492 MTI->getSourceAlignment());
3493 if (isMemCpySmall(Len, Alignment)) {
3494 Address Dest, Src;
3495 if (!computeAddress(MTI->getRawDest(), Dest) ||
3496 !computeAddress(MTI->getRawSource(), Src))
3497 return false;
3498 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3499 return true;
3500 }
3501 }
3502
3503 if (!MTI->getLength()->getType()->isIntegerTy(64))
3504 return false;
3505
3506 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3507 // Fast instruction selection doesn't support the special
3508 // address spaces.
3509 return false;
3510
3511 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3512 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3513 }
3514 case Intrinsic::memset: {
3515 const MemSetInst *MSI = cast<MemSetInst>(II);
3516 // Don't handle volatile.
3517 if (MSI->isVolatile())
3518 return false;
3519
3520 if (!MSI->getLength()->getType()->isIntegerTy(64))
3521 return false;
3522
3523 if (MSI->getDestAddressSpace() > 255)
3524 // Fast instruction selection doesn't support the special
3525 // address spaces.
3526 return false;
3527
3528 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3529 }
3530 case Intrinsic::sin:
3531 case Intrinsic::cos:
3532 case Intrinsic::pow: {
3533 MVT RetVT;
3534 if (!isTypeLegal(II->getType(), RetVT))
3535 return false;
3536
3537 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3538 return false;
3539
3540 static const RTLIB::Libcall LibCallTable[3][2] = {
3541 { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3542 { RTLIB::COS_F32, RTLIB::COS_F64 },
3543 { RTLIB::POW_F32, RTLIB::POW_F64 }
3544 };
3545 RTLIB::Libcall LC;
3546 bool Is64Bit = RetVT == MVT::f64;
3547 switch (II->getIntrinsicID()) {
3548 default:
3549 llvm_unreachable("Unexpected intrinsic.");
3550 case Intrinsic::sin:
3551 LC = LibCallTable[0][Is64Bit];
3552 break;
3553 case Intrinsic::cos:
3554 LC = LibCallTable[1][Is64Bit];
3555 break;
3556 case Intrinsic::pow:
3557 LC = LibCallTable[2][Is64Bit];
3558 break;
3559 }
3560
3561 ArgListTy Args;
3562 Args.reserve(II->getNumArgOperands());
3563
3564 // Populate the argument list.
3565 for (auto &Arg : II->arg_operands()) {
3566 ArgListEntry Entry;
3567 Entry.Val = Arg;
3568 Entry.Ty = Arg->getType();
3569 Args.push_back(Entry);
3570 }
3571
3572 CallLoweringInfo CLI;
3573 MCContext &Ctx = MF->getContext();
3574 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3575 TLI.getLibcallName(LC), std::move(Args));
3576 if (!lowerCallTo(CLI))
3577 return false;
3578 updateValueMap(II, CLI.ResultReg);
3579 return true;
3580 }
3581 case Intrinsic::fabs: {
3582 MVT VT;
3583 if (!isTypeLegal(II->getType(), VT))
3584 return false;
3585
3586 unsigned Opc;
3587 switch (VT.SimpleTy) {
3588 default:
3589 return false;
3590 case MVT::f32:
3591 Opc = AArch64::FABSSr;
3592 break;
3593 case MVT::f64:
3594 Opc = AArch64::FABSDr;
3595 break;
3596 }
3597 unsigned SrcReg = getRegForValue(II->getOperand(0));
3598 if (!SrcReg)
3599 return false;
3600 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3601 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3602 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3603 .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3604 updateValueMap(II, ResultReg);
3605 return true;
3606 }
3607 case Intrinsic::trap:
3608 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3609 .addImm(1);
3610 return true;
3611
3612 case Intrinsic::sqrt: {
3613 Type *RetTy = II->getCalledFunction()->getReturnType();
3614
3615 MVT VT;
3616 if (!isTypeLegal(RetTy, VT))
3617 return false;
3618
3619 unsigned Op0Reg = getRegForValue(II->getOperand(0));
3620 if (!Op0Reg)
3621 return false;
3622 bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3623
3624 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3625 if (!ResultReg)
3626 return false;
3627
3628 updateValueMap(II, ResultReg);
3629 return true;
3630 }
3631 case Intrinsic::sadd_with_overflow:
3632 case Intrinsic::uadd_with_overflow:
3633 case Intrinsic::ssub_with_overflow:
3634 case Intrinsic::usub_with_overflow:
3635 case Intrinsic::smul_with_overflow:
3636 case Intrinsic::umul_with_overflow: {
3637 // This implements the basic lowering of the xalu with overflow intrinsics.
3638 const Function *Callee = II->getCalledFunction();
3639 auto *Ty = cast<StructType>(Callee->getReturnType());
3640 Type *RetTy = Ty->getTypeAtIndex(0U);
3641
3642 MVT VT;
3643 if (!isTypeLegal(RetTy, VT))
3644 return false;
3645
3646 if (VT != MVT::i32 && VT != MVT::i64)
3647 return false;
3648
3649 const Value *LHS = II->getArgOperand(0);
3650 const Value *RHS = II->getArgOperand(1);
3651 // Canonicalize immediate to the RHS.
3652 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3653 isCommutativeIntrinsic(II))
3654 std::swap(LHS, RHS);
3655
3656 // Simplify multiplies.
3657 Intrinsic::ID IID = II->getIntrinsicID();
3658 switch (IID) {
3659 default:
3660 break;
3661 case Intrinsic::smul_with_overflow:
3662 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3663 if (C->getValue() == 2) {
3664 IID = Intrinsic::sadd_with_overflow;
3665 RHS = LHS;
3666 }
3667 break;
3668 case Intrinsic::umul_with_overflow:
3669 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3670 if (C->getValue() == 2) {
3671 IID = Intrinsic::uadd_with_overflow;
3672 RHS = LHS;
3673 }
3674 break;
3675 }
3676
3677 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3678 AArch64CC::CondCode CC = AArch64CC::Invalid;
3679 switch (IID) {
3680 default: llvm_unreachable("Unexpected intrinsic!");
3681 case Intrinsic::sadd_with_overflow:
3682 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3683 CC = AArch64CC::VS;
3684 break;
3685 case Intrinsic::uadd_with_overflow:
3686 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687 CC = AArch64CC::HS;
3688 break;
3689 case Intrinsic::ssub_with_overflow:
3690 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3691 CC = AArch64CC::VS;
3692 break;
3693 case Intrinsic::usub_with_overflow:
3694 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695 CC = AArch64CC::LO;
3696 break;
3697 case Intrinsic::smul_with_overflow: {
3698 CC = AArch64CC::NE;
3699 unsigned LHSReg = getRegForValue(LHS);
3700 if (!LHSReg)
3701 return false;
3702 bool LHSIsKill = hasTrivialKill(LHS);
3703
3704 unsigned RHSReg = getRegForValue(RHS);
3705 if (!RHSReg)
3706 return false;
3707 bool RHSIsKill = hasTrivialKill(RHS);
3708
3709 if (VT == MVT::i32) {
3710 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3711 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3712 /*IsKill=*/false, 32);
3713 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3714 AArch64::sub_32);
3715 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3716 AArch64::sub_32);
3717 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3718 AArch64_AM::ASR, 31, /*WantResult=*/false);
3719 } else {
3720 assert(VT == MVT::i64 && "Unexpected value type.");
3721 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3722 // reused in the next instruction.
3723 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3724 /*IsKill=*/false);
3725 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3726 RHSReg, RHSIsKill);
3727 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3728 AArch64_AM::ASR, 63, /*WantResult=*/false);
3729 }
3730 break;
3731 }
3732 case Intrinsic::umul_with_overflow: {
3733 CC = AArch64CC::NE;
3734 unsigned LHSReg = getRegForValue(LHS);
3735 if (!LHSReg)
3736 return false;
3737 bool LHSIsKill = hasTrivialKill(LHS);
3738
3739 unsigned RHSReg = getRegForValue(RHS);
3740 if (!RHSReg)
3741 return false;
3742 bool RHSIsKill = hasTrivialKill(RHS);
3743
3744 if (VT == MVT::i32) {
3745 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3746 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3747 /*IsKill=*/false, AArch64_AM::LSR, 32,
3748 /*WantResult=*/false);
3749 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3750 AArch64::sub_32);
3751 } else {
3752 assert(VT == MVT::i64 && "Unexpected value type.");
3753 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3754 // reused in the next instruction.
3755 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3756 /*IsKill=*/false);
3757 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3758 RHSReg, RHSIsKill);
3759 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3760 /*IsKill=*/false, /*WantResult=*/false);
3761 }
3762 break;
3763 }
3764 }
3765
3766 if (MulReg) {
3767 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3769 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3770 }
3771
3772 if (!ResultReg1)
3773 return false;
3774
3775 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3776 AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3777 /*IsKill=*/true, getInvertedCondCode(CC));
3778 (void)ResultReg2;
3779 assert((ResultReg1 + 1) == ResultReg2 &&
3780 "Nonconsecutive result registers.");
3781 updateValueMap(II, ResultReg1, 2);
3782 return true;
3783 }
3784 }
3785 return false;
3786 }
3787
selectRet(const Instruction * I)3788 bool AArch64FastISel::selectRet(const Instruction *I) {
3789 const ReturnInst *Ret = cast<ReturnInst>(I);
3790 const Function &F = *I->getParent()->getParent();
3791
3792 if (!FuncInfo.CanLowerReturn)
3793 return false;
3794
3795 if (F.isVarArg())
3796 return false;
3797
3798 if (TLI.supportSwiftError() &&
3799 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3800 return false;
3801
3802 if (TLI.supportSplitCSR(FuncInfo.MF))
3803 return false;
3804
3805 // Build a list of return value registers.
3806 SmallVector<unsigned, 4> RetRegs;
3807
3808 if (Ret->getNumOperands() > 0) {
3809 CallingConv::ID CC = F.getCallingConv();
3810 SmallVector<ISD::OutputArg, 4> Outs;
3811 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3812
3813 // Analyze operands of the call, assigning locations to each operand.
3814 SmallVector<CCValAssign, 16> ValLocs;
3815 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3816 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3817 : RetCC_AArch64_AAPCS;
3818 CCInfo.AnalyzeReturn(Outs, RetCC);
3819
3820 // Only handle a single return value for now.
3821 if (ValLocs.size() != 1)
3822 return false;
3823
3824 CCValAssign &VA = ValLocs[0];
3825 const Value *RV = Ret->getOperand(0);
3826
3827 // Don't bother handling odd stuff for now.
3828 if ((VA.getLocInfo() != CCValAssign::Full) &&
3829 (VA.getLocInfo() != CCValAssign::BCvt))
3830 return false;
3831
3832 // Only handle register returns for now.
3833 if (!VA.isRegLoc())
3834 return false;
3835
3836 unsigned Reg = getRegForValue(RV);
3837 if (Reg == 0)
3838 return false;
3839
3840 unsigned SrcReg = Reg + VA.getValNo();
3841 unsigned DestReg = VA.getLocReg();
3842 // Avoid a cross-class copy. This is very unlikely.
3843 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3844 return false;
3845
3846 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3847 if (!RVEVT.isSimple())
3848 return false;
3849
3850 // Vectors (of > 1 lane) in big endian need tricky handling.
3851 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3852 !Subtarget->isLittleEndian())
3853 return false;
3854
3855 MVT RVVT = RVEVT.getSimpleVT();
3856 if (RVVT == MVT::f128)
3857 return false;
3858
3859 MVT DestVT = VA.getValVT();
3860 // Special handling for extended integers.
3861 if (RVVT != DestVT) {
3862 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3863 return false;
3864
3865 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3866 return false;
3867
3868 bool IsZExt = Outs[0].Flags.isZExt();
3869 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3870 if (SrcReg == 0)
3871 return false;
3872 }
3873
3874 // Make the copy.
3875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3876 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3877
3878 // Add register to return instruction.
3879 RetRegs.push_back(VA.getLocReg());
3880 }
3881
3882 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3883 TII.get(AArch64::RET_ReallyLR));
3884 for (unsigned RetReg : RetRegs)
3885 MIB.addReg(RetReg, RegState::Implicit);
3886 return true;
3887 }
3888
selectTrunc(const Instruction * I)3889 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3890 Type *DestTy = I->getType();
3891 Value *Op = I->getOperand(0);
3892 Type *SrcTy = Op->getType();
3893
3894 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3895 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3896 if (!SrcEVT.isSimple())
3897 return false;
3898 if (!DestEVT.isSimple())
3899 return false;
3900
3901 MVT SrcVT = SrcEVT.getSimpleVT();
3902 MVT DestVT = DestEVT.getSimpleVT();
3903
3904 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3905 SrcVT != MVT::i8)
3906 return false;
3907 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3908 DestVT != MVT::i1)
3909 return false;
3910
3911 unsigned SrcReg = getRegForValue(Op);
3912 if (!SrcReg)
3913 return false;
3914 bool SrcIsKill = hasTrivialKill(Op);
3915
3916 // If we're truncating from i64 to a smaller non-legal type then generate an
3917 // AND. Otherwise, we know the high bits are undefined and a truncate only
3918 // generate a COPY. We cannot mark the source register also as result
3919 // register, because this can incorrectly transfer the kill flag onto the
3920 // source register.
3921 unsigned ResultReg;
3922 if (SrcVT == MVT::i64) {
3923 uint64_t Mask = 0;
3924 switch (DestVT.SimpleTy) {
3925 default:
3926 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3927 return false;
3928 case MVT::i1:
3929 Mask = 0x1;
3930 break;
3931 case MVT::i8:
3932 Mask = 0xff;
3933 break;
3934 case MVT::i16:
3935 Mask = 0xffff;
3936 break;
3937 }
3938 // Issue an extract_subreg to get the lower 32-bits.
3939 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3940 AArch64::sub_32);
3941 // Create the AND instruction which performs the actual truncation.
3942 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3943 assert(ResultReg && "Unexpected AND instruction emission failure.");
3944 } else {
3945 ResultReg = createResultReg(&AArch64::GPR32RegClass);
3946 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3947 TII.get(TargetOpcode::COPY), ResultReg)
3948 .addReg(SrcReg, getKillRegState(SrcIsKill));
3949 }
3950
3951 updateValueMap(I, ResultReg);
3952 return true;
3953 }
3954
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)3955 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3956 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3957 DestVT == MVT::i64) &&
3958 "Unexpected value type.");
3959 // Handle i8 and i16 as i32.
3960 if (DestVT == MVT::i8 || DestVT == MVT::i16)
3961 DestVT = MVT::i32;
3962
3963 if (IsZExt) {
3964 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3965 assert(ResultReg && "Unexpected AND instruction emission failure.");
3966 if (DestVT == MVT::i64) {
3967 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
3968 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
3969 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3970 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3971 TII.get(AArch64::SUBREG_TO_REG), Reg64)
3972 .addImm(0)
3973 .addReg(ResultReg)
3974 .addImm(AArch64::sub_32);
3975 ResultReg = Reg64;
3976 }
3977 return ResultReg;
3978 } else {
3979 if (DestVT == MVT::i64) {
3980 // FIXME: We're SExt i1 to i64.
3981 return 0;
3982 }
3983 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3984 /*TODO:IsKill=*/false, 0, 0);
3985 }
3986 }
3987
emitMul_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)3988 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3989 unsigned Op1, bool Op1IsKill) {
3990 unsigned Opc, ZReg;
3991 switch (RetVT.SimpleTy) {
3992 default: return 0;
3993 case MVT::i8:
3994 case MVT::i16:
3995 case MVT::i32:
3996 RetVT = MVT::i32;
3997 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3998 case MVT::i64:
3999 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4000 }
4001
4002 const TargetRegisterClass *RC =
4003 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4004 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4005 /*IsKill=*/ZReg, true);
4006 }
4007
emitSMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)4008 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4009 unsigned Op1, bool Op1IsKill) {
4010 if (RetVT != MVT::i64)
4011 return 0;
4012
4013 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4014 Op0, Op0IsKill, Op1, Op1IsKill,
4015 AArch64::XZR, /*IsKill=*/true);
4016 }
4017
emitUMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)4018 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4019 unsigned Op1, bool Op1IsKill) {
4020 if (RetVT != MVT::i64)
4021 return 0;
4022
4023 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4024 Op0, Op0IsKill, Op1, Op1IsKill,
4025 AArch64::XZR, /*IsKill=*/true);
4026 }
4027
emitLSL_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4028 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4029 unsigned Op1Reg, bool Op1IsKill) {
4030 unsigned Opc = 0;
4031 bool NeedTrunc = false;
4032 uint64_t Mask = 0;
4033 switch (RetVT.SimpleTy) {
4034 default: return 0;
4035 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4036 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4037 case MVT::i32: Opc = AArch64::LSLVWr; break;
4038 case MVT::i64: Opc = AArch64::LSLVXr; break;
4039 }
4040
4041 const TargetRegisterClass *RC =
4042 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4043 if (NeedTrunc) {
4044 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4045 Op1IsKill = true;
4046 }
4047 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4048 Op1IsKill);
4049 if (NeedTrunc)
4050 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4051 return ResultReg;
4052 }
4053
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4054 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4055 bool Op0IsKill, uint64_t Shift,
4056 bool IsZExt) {
4057 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058 "Unexpected source/return type pair.");
4059 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061 "Unexpected source value type.");
4062 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063 RetVT == MVT::i64) && "Unexpected return value type.");
4064
4065 bool Is64Bit = (RetVT == MVT::i64);
4066 unsigned RegSize = Is64Bit ? 64 : 32;
4067 unsigned DstBits = RetVT.getSizeInBits();
4068 unsigned SrcBits = SrcVT.getSizeInBits();
4069 const TargetRegisterClass *RC =
4070 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071
4072 // Just emit a copy for "zero" shifts.
4073 if (Shift == 0) {
4074 if (RetVT == SrcVT) {
4075 unsigned ResultReg = createResultReg(RC);
4076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4077 TII.get(TargetOpcode::COPY), ResultReg)
4078 .addReg(Op0, getKillRegState(Op0IsKill));
4079 return ResultReg;
4080 } else
4081 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082 }
4083
4084 // Don't deal with undefined shifts.
4085 if (Shift >= DstBits)
4086 return 0;
4087
4088 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089 // {S|U}BFM Wd, Wn, #r, #s
4090 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091
4092 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093 // %2 = shl i16 %1, 4
4094 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098
4099 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100 // %2 = shl i16 %1, 8
4101 // Wd<32+7-24,32-24> = Wn<7:0>
4102 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105
4106 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107 // %2 = shl i16 %1, 12
4108 // Wd<32+3-20,32-20> = Wn<3:0>
4109 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112
4113 unsigned ImmR = RegSize - Shift;
4114 // Limit the width to the length of the source type.
4115 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116 static const unsigned OpcTable[2][2] = {
4117 {AArch64::SBFMWri, AArch64::SBFMXri},
4118 {AArch64::UBFMWri, AArch64::UBFMXri}
4119 };
4120 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122 unsigned TmpReg = MRI.createVirtualRegister(RC);
4123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4124 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125 .addImm(0)
4126 .addReg(Op0, getKillRegState(Op0IsKill))
4127 .addImm(AArch64::sub_32);
4128 Op0 = TmpReg;
4129 Op0IsKill = true;
4130 }
4131 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4132 }
4133
emitLSR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4134 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4135 unsigned Op1Reg, bool Op1IsKill) {
4136 unsigned Opc = 0;
4137 bool NeedTrunc = false;
4138 uint64_t Mask = 0;
4139 switch (RetVT.SimpleTy) {
4140 default: return 0;
4141 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4142 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4143 case MVT::i32: Opc = AArch64::LSRVWr; break;
4144 case MVT::i64: Opc = AArch64::LSRVXr; break;
4145 }
4146
4147 const TargetRegisterClass *RC =
4148 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4149 if (NeedTrunc) {
4150 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4151 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4152 Op0IsKill = Op1IsKill = true;
4153 }
4154 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4155 Op1IsKill);
4156 if (NeedTrunc)
4157 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4158 return ResultReg;
4159 }
4160
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4161 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4162 bool Op0IsKill, uint64_t Shift,
4163 bool IsZExt) {
4164 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4165 "Unexpected source/return type pair.");
4166 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4167 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4168 "Unexpected source value type.");
4169 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4170 RetVT == MVT::i64) && "Unexpected return value type.");
4171
4172 bool Is64Bit = (RetVT == MVT::i64);
4173 unsigned RegSize = Is64Bit ? 64 : 32;
4174 unsigned DstBits = RetVT.getSizeInBits();
4175 unsigned SrcBits = SrcVT.getSizeInBits();
4176 const TargetRegisterClass *RC =
4177 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4178
4179 // Just emit a copy for "zero" shifts.
4180 if (Shift == 0) {
4181 if (RetVT == SrcVT) {
4182 unsigned ResultReg = createResultReg(RC);
4183 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4184 TII.get(TargetOpcode::COPY), ResultReg)
4185 .addReg(Op0, getKillRegState(Op0IsKill));
4186 return ResultReg;
4187 } else
4188 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4189 }
4190
4191 // Don't deal with undefined shifts.
4192 if (Shift >= DstBits)
4193 return 0;
4194
4195 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4196 // {S|U}BFM Wd, Wn, #r, #s
4197 // Wd<s-r:0> = Wn<s:r> when r <= s
4198
4199 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4200 // %2 = lshr i16 %1, 4
4201 // Wd<7-4:0> = Wn<7:4>
4202 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4203 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4204 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4205
4206 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4207 // %2 = lshr i16 %1, 8
4208 // Wd<7-7,0> = Wn<7:7>
4209 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4210 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4211 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4212
4213 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4214 // %2 = lshr i16 %1, 12
4215 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4216 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4217 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4218 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4219
4220 if (Shift >= SrcBits && IsZExt)
4221 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4222
4223 // It is not possible to fold a sign-extend into the LShr instruction. In this
4224 // case emit a sign-extend.
4225 if (!IsZExt) {
4226 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4227 if (!Op0)
4228 return 0;
4229 Op0IsKill = true;
4230 SrcVT = RetVT;
4231 SrcBits = SrcVT.getSizeInBits();
4232 IsZExt = true;
4233 }
4234
4235 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4236 unsigned ImmS = SrcBits - 1;
4237 static const unsigned OpcTable[2][2] = {
4238 {AArch64::SBFMWri, AArch64::SBFMXri},
4239 {AArch64::UBFMWri, AArch64::UBFMXri}
4240 };
4241 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4242 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4243 unsigned TmpReg = MRI.createVirtualRegister(RC);
4244 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4245 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4246 .addImm(0)
4247 .addReg(Op0, getKillRegState(Op0IsKill))
4248 .addImm(AArch64::sub_32);
4249 Op0 = TmpReg;
4250 Op0IsKill = true;
4251 }
4252 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4253 }
4254
emitASR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4255 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4256 unsigned Op1Reg, bool Op1IsKill) {
4257 unsigned Opc = 0;
4258 bool NeedTrunc = false;
4259 uint64_t Mask = 0;
4260 switch (RetVT.SimpleTy) {
4261 default: return 0;
4262 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4263 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4264 case MVT::i32: Opc = AArch64::ASRVWr; break;
4265 case MVT::i64: Opc = AArch64::ASRVXr; break;
4266 }
4267
4268 const TargetRegisterClass *RC =
4269 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4270 if (NeedTrunc) {
4271 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4272 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4273 Op0IsKill = Op1IsKill = true;
4274 }
4275 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4276 Op1IsKill);
4277 if (NeedTrunc)
4278 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4279 return ResultReg;
4280 }
4281
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4282 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4283 bool Op0IsKill, uint64_t Shift,
4284 bool IsZExt) {
4285 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4286 "Unexpected source/return type pair.");
4287 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4288 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4289 "Unexpected source value type.");
4290 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4291 RetVT == MVT::i64) && "Unexpected return value type.");
4292
4293 bool Is64Bit = (RetVT == MVT::i64);
4294 unsigned RegSize = Is64Bit ? 64 : 32;
4295 unsigned DstBits = RetVT.getSizeInBits();
4296 unsigned SrcBits = SrcVT.getSizeInBits();
4297 const TargetRegisterClass *RC =
4298 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4299
4300 // Just emit a copy for "zero" shifts.
4301 if (Shift == 0) {
4302 if (RetVT == SrcVT) {
4303 unsigned ResultReg = createResultReg(RC);
4304 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4305 TII.get(TargetOpcode::COPY), ResultReg)
4306 .addReg(Op0, getKillRegState(Op0IsKill));
4307 return ResultReg;
4308 } else
4309 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4310 }
4311
4312 // Don't deal with undefined shifts.
4313 if (Shift >= DstBits)
4314 return 0;
4315
4316 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4317 // {S|U}BFM Wd, Wn, #r, #s
4318 // Wd<s-r:0> = Wn<s:r> when r <= s
4319
4320 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4321 // %2 = ashr i16 %1, 4
4322 // Wd<7-4:0> = Wn<7:4>
4323 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4324 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4325 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4326
4327 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4328 // %2 = ashr i16 %1, 8
4329 // Wd<7-7,0> = Wn<7:7>
4330 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4331 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4332 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4333
4334 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4335 // %2 = ashr i16 %1, 12
4336 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4337 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4338 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4339 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4340
4341 if (Shift >= SrcBits && IsZExt)
4342 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4343
4344 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4345 unsigned ImmS = SrcBits - 1;
4346 static const unsigned OpcTable[2][2] = {
4347 {AArch64::SBFMWri, AArch64::SBFMXri},
4348 {AArch64::UBFMWri, AArch64::UBFMXri}
4349 };
4350 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4351 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4352 unsigned TmpReg = MRI.createVirtualRegister(RC);
4353 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4354 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4355 .addImm(0)
4356 .addReg(Op0, getKillRegState(Op0IsKill))
4357 .addImm(AArch64::sub_32);
4358 Op0 = TmpReg;
4359 Op0IsKill = true;
4360 }
4361 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4362 }
4363
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4364 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4365 bool IsZExt) {
4366 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4367
4368 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4369 // DestVT are odd things, so test to make sure that they are both types we can
4370 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4371 // bail out to SelectionDAG.
4372 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4373 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4374 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4375 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4376 return 0;
4377
4378 unsigned Opc;
4379 unsigned Imm = 0;
4380
4381 switch (SrcVT.SimpleTy) {
4382 default:
4383 return 0;
4384 case MVT::i1:
4385 return emiti1Ext(SrcReg, DestVT, IsZExt);
4386 case MVT::i8:
4387 if (DestVT == MVT::i64)
4388 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4389 else
4390 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4391 Imm = 7;
4392 break;
4393 case MVT::i16:
4394 if (DestVT == MVT::i64)
4395 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4396 else
4397 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4398 Imm = 15;
4399 break;
4400 case MVT::i32:
4401 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4402 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4403 Imm = 31;
4404 break;
4405 }
4406
4407 // Handle i8 and i16 as i32.
4408 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4409 DestVT = MVT::i32;
4410 else if (DestVT == MVT::i64) {
4411 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4412 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4413 TII.get(AArch64::SUBREG_TO_REG), Src64)
4414 .addImm(0)
4415 .addReg(SrcReg)
4416 .addImm(AArch64::sub_32);
4417 SrcReg = Src64;
4418 }
4419
4420 const TargetRegisterClass *RC =
4421 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4422 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4423 }
4424
isZExtLoad(const MachineInstr * LI)4425 static bool isZExtLoad(const MachineInstr *LI) {
4426 switch (LI->getOpcode()) {
4427 default:
4428 return false;
4429 case AArch64::LDURBBi:
4430 case AArch64::LDURHHi:
4431 case AArch64::LDURWi:
4432 case AArch64::LDRBBui:
4433 case AArch64::LDRHHui:
4434 case AArch64::LDRWui:
4435 case AArch64::LDRBBroX:
4436 case AArch64::LDRHHroX:
4437 case AArch64::LDRWroX:
4438 case AArch64::LDRBBroW:
4439 case AArch64::LDRHHroW:
4440 case AArch64::LDRWroW:
4441 return true;
4442 }
4443 }
4444
isSExtLoad(const MachineInstr * LI)4445 static bool isSExtLoad(const MachineInstr *LI) {
4446 switch (LI->getOpcode()) {
4447 default:
4448 return false;
4449 case AArch64::LDURSBWi:
4450 case AArch64::LDURSHWi:
4451 case AArch64::LDURSBXi:
4452 case AArch64::LDURSHXi:
4453 case AArch64::LDURSWi:
4454 case AArch64::LDRSBWui:
4455 case AArch64::LDRSHWui:
4456 case AArch64::LDRSBXui:
4457 case AArch64::LDRSHXui:
4458 case AArch64::LDRSWui:
4459 case AArch64::LDRSBWroX:
4460 case AArch64::LDRSHWroX:
4461 case AArch64::LDRSBXroX:
4462 case AArch64::LDRSHXroX:
4463 case AArch64::LDRSWroX:
4464 case AArch64::LDRSBWroW:
4465 case AArch64::LDRSHWroW:
4466 case AArch64::LDRSBXroW:
4467 case AArch64::LDRSHXroW:
4468 case AArch64::LDRSWroW:
4469 return true;
4470 }
4471 }
4472
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4473 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4474 MVT SrcVT) {
4475 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4476 if (!LI || !LI->hasOneUse())
4477 return false;
4478
4479 // Check if the load instruction has already been selected.
4480 unsigned Reg = lookUpRegForValue(LI);
4481 if (!Reg)
4482 return false;
4483
4484 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4485 if (!MI)
4486 return false;
4487
4488 // Check if the correct load instruction has been emitted - SelectionDAG might
4489 // have emitted a zero-extending load, but we need a sign-extending load.
4490 bool IsZExt = isa<ZExtInst>(I);
4491 const auto *LoadMI = MI;
4492 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4493 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4494 unsigned LoadReg = MI->getOperand(1).getReg();
4495 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4496 assert(LoadMI && "Expected valid instruction");
4497 }
4498 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4499 return false;
4500
4501 // Nothing to be done.
4502 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4503 updateValueMap(I, Reg);
4504 return true;
4505 }
4506
4507 if (IsZExt) {
4508 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4510 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4511 .addImm(0)
4512 .addReg(Reg, getKillRegState(true))
4513 .addImm(AArch64::sub_32);
4514 Reg = Reg64;
4515 } else {
4516 assert((MI->getOpcode() == TargetOpcode::COPY &&
4517 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4518 "Expected copy instruction");
4519 Reg = MI->getOperand(1).getReg();
4520 MachineBasicBlock::iterator I(MI);
4521 removeDeadCode(I, std::next(I));
4522 }
4523 updateValueMap(I, Reg);
4524 return true;
4525 }
4526
selectIntExt(const Instruction * I)4527 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4528 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4529 "Unexpected integer extend instruction.");
4530 MVT RetVT;
4531 MVT SrcVT;
4532 if (!isTypeSupported(I->getType(), RetVT))
4533 return false;
4534
4535 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4536 return false;
4537
4538 // Try to optimize already sign-/zero-extended values from load instructions.
4539 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4540 return true;
4541
4542 unsigned SrcReg = getRegForValue(I->getOperand(0));
4543 if (!SrcReg)
4544 return false;
4545 bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4546
4547 // Try to optimize already sign-/zero-extended values from function arguments.
4548 bool IsZExt = isa<ZExtInst>(I);
4549 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4550 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4551 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4552 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4553 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4554 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4555 .addImm(0)
4556 .addReg(SrcReg, getKillRegState(SrcIsKill))
4557 .addImm(AArch64::sub_32);
4558 SrcReg = ResultReg;
4559 }
4560 // Conservatively clear all kill flags from all uses, because we are
4561 // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4562 // level. The result of the instruction at IR level might have been
4563 // trivially dead, which is now not longer true.
4564 unsigned UseReg = lookUpRegForValue(I);
4565 if (UseReg)
4566 MRI.clearKillFlags(UseReg);
4567
4568 updateValueMap(I, SrcReg);
4569 return true;
4570 }
4571 }
4572
4573 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4574 if (!ResultReg)
4575 return false;
4576
4577 updateValueMap(I, ResultReg);
4578 return true;
4579 }
4580
selectRem(const Instruction * I,unsigned ISDOpcode)4581 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4582 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4583 if (!DestEVT.isSimple())
4584 return false;
4585
4586 MVT DestVT = DestEVT.getSimpleVT();
4587 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4588 return false;
4589
4590 unsigned DivOpc;
4591 bool Is64bit = (DestVT == MVT::i64);
4592 switch (ISDOpcode) {
4593 default:
4594 return false;
4595 case ISD::SREM:
4596 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4597 break;
4598 case ISD::UREM:
4599 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4600 break;
4601 }
4602 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4603 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4604 if (!Src0Reg)
4605 return false;
4606 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4607
4608 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4609 if (!Src1Reg)
4610 return false;
4611 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4612
4613 const TargetRegisterClass *RC =
4614 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4615 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4616 Src1Reg, /*IsKill=*/false);
4617 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4618 // The remainder is computed as numerator - (quotient * denominator) using the
4619 // MSUB instruction.
4620 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4621 Src1Reg, Src1IsKill, Src0Reg,
4622 Src0IsKill);
4623 updateValueMap(I, ResultReg);
4624 return true;
4625 }
4626
selectMul(const Instruction * I)4627 bool AArch64FastISel::selectMul(const Instruction *I) {
4628 MVT VT;
4629 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4630 return false;
4631
4632 if (VT.isVector())
4633 return selectBinaryOp(I, ISD::MUL);
4634
4635 const Value *Src0 = I->getOperand(0);
4636 const Value *Src1 = I->getOperand(1);
4637 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4638 if (C->getValue().isPowerOf2())
4639 std::swap(Src0, Src1);
4640
4641 // Try to simplify to a shift instruction.
4642 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4643 if (C->getValue().isPowerOf2()) {
4644 uint64_t ShiftVal = C->getValue().logBase2();
4645 MVT SrcVT = VT;
4646 bool IsZExt = true;
4647 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4648 if (!isIntExtFree(ZExt)) {
4649 MVT VT;
4650 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4651 SrcVT = VT;
4652 IsZExt = true;
4653 Src0 = ZExt->getOperand(0);
4654 }
4655 }
4656 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4657 if (!isIntExtFree(SExt)) {
4658 MVT VT;
4659 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4660 SrcVT = VT;
4661 IsZExt = false;
4662 Src0 = SExt->getOperand(0);
4663 }
4664 }
4665 }
4666
4667 unsigned Src0Reg = getRegForValue(Src0);
4668 if (!Src0Reg)
4669 return false;
4670 bool Src0IsKill = hasTrivialKill(Src0);
4671
4672 unsigned ResultReg =
4673 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4674
4675 if (ResultReg) {
4676 updateValueMap(I, ResultReg);
4677 return true;
4678 }
4679 }
4680
4681 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4682 if (!Src0Reg)
4683 return false;
4684 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4685
4686 unsigned Src1Reg = getRegForValue(I->getOperand(1));
4687 if (!Src1Reg)
4688 return false;
4689 bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4690
4691 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4692
4693 if (!ResultReg)
4694 return false;
4695
4696 updateValueMap(I, ResultReg);
4697 return true;
4698 }
4699
selectShift(const Instruction * I)4700 bool AArch64FastISel::selectShift(const Instruction *I) {
4701 MVT RetVT;
4702 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4703 return false;
4704
4705 if (RetVT.isVector())
4706 return selectOperator(I, I->getOpcode());
4707
4708 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4709 unsigned ResultReg = 0;
4710 uint64_t ShiftVal = C->getZExtValue();
4711 MVT SrcVT = RetVT;
4712 bool IsZExt = I->getOpcode() != Instruction::AShr;
4713 const Value *Op0 = I->getOperand(0);
4714 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4715 if (!isIntExtFree(ZExt)) {
4716 MVT TmpVT;
4717 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4718 SrcVT = TmpVT;
4719 IsZExt = true;
4720 Op0 = ZExt->getOperand(0);
4721 }
4722 }
4723 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4724 if (!isIntExtFree(SExt)) {
4725 MVT TmpVT;
4726 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4727 SrcVT = TmpVT;
4728 IsZExt = false;
4729 Op0 = SExt->getOperand(0);
4730 }
4731 }
4732 }
4733
4734 unsigned Op0Reg = getRegForValue(Op0);
4735 if (!Op0Reg)
4736 return false;
4737 bool Op0IsKill = hasTrivialKill(Op0);
4738
4739 switch (I->getOpcode()) {
4740 default: llvm_unreachable("Unexpected instruction.");
4741 case Instruction::Shl:
4742 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4743 break;
4744 case Instruction::AShr:
4745 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4746 break;
4747 case Instruction::LShr:
4748 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4749 break;
4750 }
4751 if (!ResultReg)
4752 return false;
4753
4754 updateValueMap(I, ResultReg);
4755 return true;
4756 }
4757
4758 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4759 if (!Op0Reg)
4760 return false;
4761 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4762
4763 unsigned Op1Reg = getRegForValue(I->getOperand(1));
4764 if (!Op1Reg)
4765 return false;
4766 bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4767
4768 unsigned ResultReg = 0;
4769 switch (I->getOpcode()) {
4770 default: llvm_unreachable("Unexpected instruction.");
4771 case Instruction::Shl:
4772 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4773 break;
4774 case Instruction::AShr:
4775 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4776 break;
4777 case Instruction::LShr:
4778 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4779 break;
4780 }
4781
4782 if (!ResultReg)
4783 return false;
4784
4785 updateValueMap(I, ResultReg);
4786 return true;
4787 }
4788
selectBitCast(const Instruction * I)4789 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4790 MVT RetVT, SrcVT;
4791
4792 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4793 return false;
4794 if (!isTypeLegal(I->getType(), RetVT))
4795 return false;
4796
4797 unsigned Opc;
4798 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4799 Opc = AArch64::FMOVWSr;
4800 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4801 Opc = AArch64::FMOVXDr;
4802 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4803 Opc = AArch64::FMOVSWr;
4804 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4805 Opc = AArch64::FMOVDXr;
4806 else
4807 return false;
4808
4809 const TargetRegisterClass *RC = nullptr;
4810 switch (RetVT.SimpleTy) {
4811 default: llvm_unreachable("Unexpected value type.");
4812 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4813 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4814 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4815 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4816 }
4817 unsigned Op0Reg = getRegForValue(I->getOperand(0));
4818 if (!Op0Reg)
4819 return false;
4820 bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4821 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4822
4823 if (!ResultReg)
4824 return false;
4825
4826 updateValueMap(I, ResultReg);
4827 return true;
4828 }
4829
selectFRem(const Instruction * I)4830 bool AArch64FastISel::selectFRem(const Instruction *I) {
4831 MVT RetVT;
4832 if (!isTypeLegal(I->getType(), RetVT))
4833 return false;
4834
4835 RTLIB::Libcall LC;
4836 switch (RetVT.SimpleTy) {
4837 default:
4838 return false;
4839 case MVT::f32:
4840 LC = RTLIB::REM_F32;
4841 break;
4842 case MVT::f64:
4843 LC = RTLIB::REM_F64;
4844 break;
4845 }
4846
4847 ArgListTy Args;
4848 Args.reserve(I->getNumOperands());
4849
4850 // Populate the argument list.
4851 for (auto &Arg : I->operands()) {
4852 ArgListEntry Entry;
4853 Entry.Val = Arg;
4854 Entry.Ty = Arg->getType();
4855 Args.push_back(Entry);
4856 }
4857
4858 CallLoweringInfo CLI;
4859 MCContext &Ctx = MF->getContext();
4860 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4861 TLI.getLibcallName(LC), std::move(Args));
4862 if (!lowerCallTo(CLI))
4863 return false;
4864 updateValueMap(I, CLI.ResultReg);
4865 return true;
4866 }
4867
selectSDiv(const Instruction * I)4868 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4869 MVT VT;
4870 if (!isTypeLegal(I->getType(), VT))
4871 return false;
4872
4873 if (!isa<ConstantInt>(I->getOperand(1)))
4874 return selectBinaryOp(I, ISD::SDIV);
4875
4876 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4877 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4878 !(C.isPowerOf2() || (-C).isPowerOf2()))
4879 return selectBinaryOp(I, ISD::SDIV);
4880
4881 unsigned Lg2 = C.countTrailingZeros();
4882 unsigned Src0Reg = getRegForValue(I->getOperand(0));
4883 if (!Src0Reg)
4884 return false;
4885 bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4886
4887 if (cast<BinaryOperator>(I)->isExact()) {
4888 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4889 if (!ResultReg)
4890 return false;
4891 updateValueMap(I, ResultReg);
4892 return true;
4893 }
4894
4895 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4896 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4897 if (!AddReg)
4898 return false;
4899
4900 // (Src0 < 0) ? Pow2 - 1 : 0;
4901 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4902 return false;
4903
4904 unsigned SelectOpc;
4905 const TargetRegisterClass *RC;
4906 if (VT == MVT::i64) {
4907 SelectOpc = AArch64::CSELXr;
4908 RC = &AArch64::GPR64RegClass;
4909 } else {
4910 SelectOpc = AArch64::CSELWr;
4911 RC = &AArch64::GPR32RegClass;
4912 }
4913 unsigned SelectReg =
4914 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4915 Src0IsKill, AArch64CC::LT);
4916 if (!SelectReg)
4917 return false;
4918
4919 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4920 // negate the result.
4921 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4922 unsigned ResultReg;
4923 if (C.isNegative())
4924 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4925 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4926 else
4927 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4928
4929 if (!ResultReg)
4930 return false;
4931
4932 updateValueMap(I, ResultReg);
4933 return true;
4934 }
4935
4936 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4937 /// have to duplicate it for AArch64, because otherwise we would fail during the
4938 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4939 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4940 unsigned IdxN = getRegForValue(Idx);
4941 if (IdxN == 0)
4942 // Unhandled operand. Halt "fast" selection and bail.
4943 return std::pair<unsigned, bool>(0, false);
4944
4945 bool IdxNIsKill = hasTrivialKill(Idx);
4946
4947 // If the index is smaller or larger than intptr_t, truncate or extend it.
4948 MVT PtrVT = TLI.getPointerTy(DL);
4949 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4950 if (IdxVT.bitsLT(PtrVT)) {
4951 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4952 IdxNIsKill = true;
4953 } else if (IdxVT.bitsGT(PtrVT))
4954 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4955 return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4956 }
4957
4958 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4959 /// duplicate it for AArch64, because otherwise we would bail out even for
4960 /// simple cases. This is because the standard fastEmit functions don't cover
4961 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4962 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4963 unsigned N = getRegForValue(I->getOperand(0));
4964 if (!N)
4965 return false;
4966 bool NIsKill = hasTrivialKill(I->getOperand(0));
4967
4968 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4969 // into a single N = N + TotalOffset.
4970 uint64_t TotalOffs = 0;
4971 MVT VT = TLI.getPointerTy(DL);
4972 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4973 GTI != E; ++GTI) {
4974 const Value *Idx = GTI.getOperand();
4975 if (auto *StTy = GTI.getStructTypeOrNull()) {
4976 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4977 // N = N + Offset
4978 if (Field)
4979 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4980 } else {
4981 Type *Ty = GTI.getIndexedType();
4982
4983 // If this is a constant subscript, handle it quickly.
4984 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4985 if (CI->isZero())
4986 continue;
4987 // N = N + Offset
4988 TotalOffs +=
4989 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4990 continue;
4991 }
4992 if (TotalOffs) {
4993 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4994 if (!N)
4995 return false;
4996 NIsKill = true;
4997 TotalOffs = 0;
4998 }
4999
5000 // N = N + Idx * ElementSize;
5001 uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5002 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5003 unsigned IdxN = Pair.first;
5004 bool IdxNIsKill = Pair.second;
5005 if (!IdxN)
5006 return false;
5007
5008 if (ElementSize != 1) {
5009 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5010 if (!C)
5011 return false;
5012 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5013 if (!IdxN)
5014 return false;
5015 IdxNIsKill = true;
5016 }
5017 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5018 if (!N)
5019 return false;
5020 }
5021 }
5022 if (TotalOffs) {
5023 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5024 if (!N)
5025 return false;
5026 }
5027 updateValueMap(I, N);
5028 return true;
5029 }
5030
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)5031 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5032 assert(TM.getOptLevel() == CodeGenOpt::None &&
5033 "cmpxchg survived AtomicExpand at optlevel > -O0");
5034
5035 auto *RetPairTy = cast<StructType>(I->getType());
5036 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5037 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5038 "cmpxchg has a non-i1 status result");
5039
5040 MVT VT;
5041 if (!isTypeLegal(RetTy, VT))
5042 return false;
5043
5044 const TargetRegisterClass *ResRC;
5045 unsigned Opc, CmpOpc;
5046 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5047 // extractvalue selection doesn't support that.
5048 if (VT == MVT::i32) {
5049 Opc = AArch64::CMP_SWAP_32;
5050 CmpOpc = AArch64::SUBSWrs;
5051 ResRC = &AArch64::GPR32RegClass;
5052 } else if (VT == MVT::i64) {
5053 Opc = AArch64::CMP_SWAP_64;
5054 CmpOpc = AArch64::SUBSXrs;
5055 ResRC = &AArch64::GPR64RegClass;
5056 } else {
5057 return false;
5058 }
5059
5060 const MCInstrDesc &II = TII.get(Opc);
5061
5062 const unsigned AddrReg = constrainOperandRegClass(
5063 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5064 const unsigned DesiredReg = constrainOperandRegClass(
5065 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5066 const unsigned NewReg = constrainOperandRegClass(
5067 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5068
5069 const unsigned ResultReg1 = createResultReg(ResRC);
5070 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5071 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5072
5073 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5074 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5075 .addDef(ResultReg1)
5076 .addDef(ScratchReg)
5077 .addUse(AddrReg)
5078 .addUse(DesiredReg)
5079 .addUse(NewReg);
5080
5081 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5082 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5083 .addUse(ResultReg1)
5084 .addUse(DesiredReg)
5085 .addImm(0);
5086
5087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5088 .addDef(ResultReg2)
5089 .addUse(AArch64::WZR)
5090 .addUse(AArch64::WZR)
5091 .addImm(AArch64CC::NE);
5092
5093 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5094 updateValueMap(I, ResultReg1, 2);
5095 return true;
5096 }
5097
fastSelectInstruction(const Instruction * I)5098 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5099 switch (I->getOpcode()) {
5100 default:
5101 break;
5102 case Instruction::Add:
5103 case Instruction::Sub:
5104 return selectAddSub(I);
5105 case Instruction::Mul:
5106 return selectMul(I);
5107 case Instruction::SDiv:
5108 return selectSDiv(I);
5109 case Instruction::SRem:
5110 if (!selectBinaryOp(I, ISD::SREM))
5111 return selectRem(I, ISD::SREM);
5112 return true;
5113 case Instruction::URem:
5114 if (!selectBinaryOp(I, ISD::UREM))
5115 return selectRem(I, ISD::UREM);
5116 return true;
5117 case Instruction::Shl:
5118 case Instruction::LShr:
5119 case Instruction::AShr:
5120 return selectShift(I);
5121 case Instruction::And:
5122 case Instruction::Or:
5123 case Instruction::Xor:
5124 return selectLogicalOp(I);
5125 case Instruction::Br:
5126 return selectBranch(I);
5127 case Instruction::IndirectBr:
5128 return selectIndirectBr(I);
5129 case Instruction::BitCast:
5130 if (!FastISel::selectBitCast(I))
5131 return selectBitCast(I);
5132 return true;
5133 case Instruction::FPToSI:
5134 if (!selectCast(I, ISD::FP_TO_SINT))
5135 return selectFPToInt(I, /*Signed=*/true);
5136 return true;
5137 case Instruction::FPToUI:
5138 return selectFPToInt(I, /*Signed=*/false);
5139 case Instruction::ZExt:
5140 case Instruction::SExt:
5141 return selectIntExt(I);
5142 case Instruction::Trunc:
5143 if (!selectCast(I, ISD::TRUNCATE))
5144 return selectTrunc(I);
5145 return true;
5146 case Instruction::FPExt:
5147 return selectFPExt(I);
5148 case Instruction::FPTrunc:
5149 return selectFPTrunc(I);
5150 case Instruction::SIToFP:
5151 if (!selectCast(I, ISD::SINT_TO_FP))
5152 return selectIntToFP(I, /*Signed=*/true);
5153 return true;
5154 case Instruction::UIToFP:
5155 return selectIntToFP(I, /*Signed=*/false);
5156 case Instruction::Load:
5157 return selectLoad(I);
5158 case Instruction::Store:
5159 return selectStore(I);
5160 case Instruction::FCmp:
5161 case Instruction::ICmp:
5162 return selectCmp(I);
5163 case Instruction::Select:
5164 return selectSelect(I);
5165 case Instruction::Ret:
5166 return selectRet(I);
5167 case Instruction::FRem:
5168 return selectFRem(I);
5169 case Instruction::GetElementPtr:
5170 return selectGetElementPtr(I);
5171 case Instruction::AtomicCmpXchg:
5172 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5173 }
5174
5175 // Silence warnings.
5176 (void)&CC_AArch64_DarwinPCS_VarArg;
5177 (void)&CC_AArch64_Win64_VarArg;
5178
5179 // fall-back to target-independent instruction selection.
5180 return selectOperator(I, I->getOpcode());
5181 }
5182
5183 namespace llvm {
5184
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5185 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5186 const TargetLibraryInfo *LibInfo) {
5187 return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189
5190 } // end namespace llvm
5191