1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "Utils/AArch64BaseInfo.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/Analysis/BranchProbabilityInfo.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/FastISel.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineConstantPool.h" 32 #include "llvm/CodeGen/MachineFrameInfo.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineMemOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/RuntimeLibcalls.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/Argument.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/CallingConv.h" 43 #include "llvm/IR/Constant.h" 44 #include "llvm/IR/Constants.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/Function.h" 48 #include "llvm/IR/GetElementPtrTypeIterator.h" 49 #include "llvm/IR/GlobalValue.h" 50 #include "llvm/IR/InstrTypes.h" 51 #include "llvm/IR/Instruction.h" 52 #include "llvm/IR/Instructions.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/Intrinsics.h" 55 #include "llvm/IR/Operator.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/MC/MCInstrDesc.h" 60 #include "llvm/MC/MCRegisterInfo.h" 61 #include "llvm/MC/MCSymbol.h" 62 #include "llvm/Support/AtomicOrdering.h" 63 #include "llvm/Support/Casting.h" 64 #include "llvm/Support/CodeGen.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/MachineValueType.h" 68 #include "llvm/Support/MathExtras.h" 69 #include <algorithm> 70 #include <cassert> 71 #include <cstdint> 72 #include <iterator> 73 #include <utility> 74 75 using namespace llvm; 76 77 namespace { 78 79 class AArch64FastISel final : public FastISel { 80 class Address { 81 public: 82 using BaseKind = enum { 83 RegBase, 84 FrameIndexBase 85 }; 86 87 private: 88 BaseKind Kind = RegBase; 89 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 90 union { 91 unsigned Reg; 92 int FI; 93 } Base; 94 unsigned OffsetReg = 0; 95 unsigned Shift = 0; 96 int64_t Offset = 0; 97 const GlobalValue *GV = nullptr; 98 99 public: 100 Address() { Base.Reg = 0; } 101 102 void setKind(BaseKind K) { Kind = K; } 103 BaseKind getKind() const { return Kind; } 104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 106 bool isRegBase() const { return Kind == RegBase; } 107 bool isFIBase() const { return Kind == FrameIndexBase; } 108 109 void setReg(unsigned Reg) { 110 assert(isRegBase() && "Invalid base register access!"); 111 Base.Reg = Reg; 112 } 113 114 unsigned getReg() const { 115 assert(isRegBase() && "Invalid base register access!"); 116 return Base.Reg; 117 } 118 119 void setOffsetReg(unsigned Reg) { 120 OffsetReg = Reg; 121 } 122 123 unsigned getOffsetReg() const { 124 return OffsetReg; 125 } 126 127 void setFI(unsigned FI) { 128 assert(isFIBase() && "Invalid base frame index access!"); 129 Base.FI = FI; 130 } 131 132 unsigned getFI() const { 133 assert(isFIBase() && "Invalid base frame index access!"); 134 return Base.FI; 135 } 136 137 void setOffset(int64_t O) { Offset = O; } 138 int64_t getOffset() { return Offset; } 139 void setShift(unsigned S) { Shift = S; } 140 unsigned getShift() { return Shift; } 141 142 void setGlobalValue(const GlobalValue *G) { GV = G; } 143 const GlobalValue *getGlobalValue() { return GV; } 144 }; 145 146 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 147 /// make the right decision when generating code for different targets. 148 const AArch64Subtarget *Subtarget; 149 LLVMContext *Context; 150 151 bool fastLowerArguments() override; 152 bool fastLowerCall(CallLoweringInfo &CLI) override; 153 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 154 155 private: 156 // Selection routines. 157 bool selectAddSub(const Instruction *I); 158 bool selectLogicalOp(const Instruction *I); 159 bool selectLoad(const Instruction *I); 160 bool selectStore(const Instruction *I); 161 bool selectBranch(const Instruction *I); 162 bool selectIndirectBr(const Instruction *I); 163 bool selectCmp(const Instruction *I); 164 bool selectSelect(const Instruction *I); 165 bool selectFPExt(const Instruction *I); 166 bool selectFPTrunc(const Instruction *I); 167 bool selectFPToInt(const Instruction *I, bool Signed); 168 bool selectIntToFP(const Instruction *I, bool Signed); 169 bool selectRem(const Instruction *I, unsigned ISDOpcode); 170 bool selectRet(const Instruction *I); 171 bool selectTrunc(const Instruction *I); 172 bool selectIntExt(const Instruction *I); 173 bool selectMul(const Instruction *I); 174 bool selectShift(const Instruction *I); 175 bool selectBitCast(const Instruction *I); 176 bool selectFRem(const Instruction *I); 177 bool selectSDiv(const Instruction *I); 178 bool selectGetElementPtr(const Instruction *I); 179 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 180 181 // Utility helper routines. 182 bool isTypeLegal(Type *Ty, MVT &VT); 183 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 184 bool isValueAvailable(const Value *V) const; 185 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 186 bool computeCallAddress(const Value *V, Address &Addr); 187 bool simplifyAddress(Address &Addr, MVT VT); 188 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 189 MachineMemOperand::Flags Flags, 190 unsigned ScaleFactor, MachineMemOperand *MMO); 191 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 192 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 193 unsigned Alignment); 194 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 195 const Value *Cond); 196 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 197 bool optimizeSelect(const SelectInst *SI); 198 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 199 200 // Emit helper routines. 201 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 202 const Value *RHS, bool SetFlags = false, 203 bool WantResult = true, bool IsZExt = false); 204 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 205 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 206 bool SetFlags = false, bool WantResult = true); 207 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 209 bool WantResult = true); 210 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 212 AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 217 AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 243 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 245 unsigned RHSReg, bool RHSIsKill, 246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 247 bool WantResult = true); 248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 249 const Value *RHS); 250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 bool LHSIsKill, uint64_t Imm); 252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 253 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 254 uint64_t ShiftImm); 255 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 256 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 257 unsigned Op1, bool Op1IsKill); 258 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 259 unsigned Op1, bool Op1IsKill); 260 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 261 unsigned Op1, bool Op1IsKill); 262 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 263 unsigned Op1Reg, bool Op1IsKill); 264 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 265 uint64_t Imm, bool IsZExt = true); 266 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 267 unsigned Op1Reg, bool Op1IsKill); 268 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 269 uint64_t Imm, bool IsZExt = true); 270 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 271 unsigned Op1Reg, bool Op1IsKill); 272 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 273 uint64_t Imm, bool IsZExt = false); 274 275 unsigned materializeInt(const ConstantInt *CI, MVT VT); 276 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 277 unsigned materializeGV(const GlobalValue *GV); 278 279 // Call handling routines. 280 private: 281 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 282 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 283 unsigned &NumBytes); 284 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 285 286 public: 287 // Backend specific FastISel code. 288 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 289 unsigned fastMaterializeConstant(const Constant *C) override; 290 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 291 292 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 293 const TargetLibraryInfo *LibInfo) 294 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 295 Subtarget = 296 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 297 Context = &FuncInfo.Fn->getContext(); 298 } 299 300 bool fastSelectInstruction(const Instruction *I) override; 301 302 #include "AArch64GenFastISel.inc" 303 }; 304 305 } // end anonymous namespace 306 307 /// Check if the sign-/zero-extend will be a noop. 308 static bool isIntExtFree(const Instruction *I) { 309 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 310 "Unexpected integer extend instruction."); 311 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 312 "Unexpected value type."); 313 bool IsZExt = isa<ZExtInst>(I); 314 315 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 316 if (LI->hasOneUse()) 317 return true; 318 319 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 320 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 321 return true; 322 323 return false; 324 } 325 326 /// Determine the implicit scale factor that is applied by a memory 327 /// operation for a given value type. 328 static unsigned getImplicitScaleFactor(MVT VT) { 329 switch (VT.SimpleTy) { 330 default: 331 return 0; // invalid 332 case MVT::i1: // fall-through 333 case MVT::i8: 334 return 1; 335 case MVT::i16: 336 return 2; 337 case MVT::i32: // fall-through 338 case MVT::f32: 339 return 4; 340 case MVT::i64: // fall-through 341 case MVT::f64: 342 return 8; 343 } 344 } 345 346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 347 if (CC == CallingConv::WebKit_JS) 348 return CC_AArch64_WebKit_JS; 349 if (CC == CallingConv::GHC) 350 return CC_AArch64_GHC; 351 if (CC == CallingConv::CFGuard_Check) 352 return CC_AArch64_Win64_CFGuard_Check; 353 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 354 } 355 356 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 357 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 358 "Alloca should always return a pointer."); 359 360 // Don't handle dynamic allocas. 361 if (!FuncInfo.StaticAllocaMap.count(AI)) 362 return 0; 363 364 DenseMap<const AllocaInst *, int>::iterator SI = 365 FuncInfo.StaticAllocaMap.find(AI); 366 367 if (SI != FuncInfo.StaticAllocaMap.end()) { 368 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 370 ResultReg) 371 .addFrameIndex(SI->second) 372 .addImm(0) 373 .addImm(0); 374 return ResultReg; 375 } 376 377 return 0; 378 } 379 380 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 381 if (VT > MVT::i64) 382 return 0; 383 384 if (!CI->isZero()) 385 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 386 387 // Create a copy from the zero register to materialize a "0" value. 388 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 389 : &AArch64::GPR32RegClass; 390 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 391 unsigned ResultReg = createResultReg(RC); 392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 393 ResultReg).addReg(ZeroReg, getKillRegState(true)); 394 return ResultReg; 395 } 396 397 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 398 // Positive zero (+0.0) has to be materialized with a fmov from the zero 399 // register, because the immediate version of fmov cannot encode zero. 400 if (CFP->isNullValue()) 401 return fastMaterializeFloatZero(CFP); 402 403 if (VT != MVT::f32 && VT != MVT::f64) 404 return 0; 405 406 const APFloat Val = CFP->getValueAPF(); 407 bool Is64Bit = (VT == MVT::f64); 408 // This checks to see if we can use FMOV instructions to materialize 409 // a constant, otherwise we have to materialize via the constant pool. 410 int Imm = 411 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 412 if (Imm != -1) { 413 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 414 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 415 } 416 417 // For the MachO large code model materialize the FP constant in code. 418 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 419 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 420 const TargetRegisterClass *RC = Is64Bit ? 421 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 422 423 unsigned TmpReg = createResultReg(RC); 424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 425 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 426 427 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 429 TII.get(TargetOpcode::COPY), ResultReg) 430 .addReg(TmpReg, getKillRegState(true)); 431 432 return ResultReg; 433 } 434 435 // Materialize via constant pool. MachineConstantPool wants an explicit 436 // alignment. 437 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 438 439 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 440 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 442 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 443 444 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 445 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 447 .addReg(ADRPReg) 448 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 449 return ResultReg; 450 } 451 452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 453 // We can't handle thread-local variables quickly yet. 454 if (GV->isThreadLocal()) 455 return 0; 456 457 // MachO still uses GOT for large code-model accesses, but ELF requires 458 // movz/movk sequences, which FastISel doesn't handle yet. 459 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 460 return 0; 461 462 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 463 464 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 465 if (!DestEVT.isSimple()) 466 return 0; 467 468 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 469 unsigned ResultReg; 470 471 if (OpFlags & AArch64II::MO_GOT) { 472 // ADRP + LDRX 473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 474 ADRPReg) 475 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 476 477 unsigned LdrOpc; 478 if (Subtarget->isTargetILP32()) { 479 ResultReg = createResultReg(&AArch64::GPR32RegClass); 480 LdrOpc = AArch64::LDRWui; 481 } else { 482 ResultReg = createResultReg(&AArch64::GPR64RegClass); 483 LdrOpc = AArch64::LDRXui; 484 } 485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), 486 ResultReg) 487 .addReg(ADRPReg) 488 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 489 AArch64II::MO_NC | OpFlags); 490 if (!Subtarget->isTargetILP32()) 491 return ResultReg; 492 493 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 494 // so we must extend the result on ILP32. 495 unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); 496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 497 TII.get(TargetOpcode::SUBREG_TO_REG)) 498 .addDef(Result64) 499 .addImm(0) 500 .addReg(ResultReg, RegState::Kill) 501 .addImm(AArch64::sub_32); 502 return Result64; 503 } else { 504 // ADRP + ADDX 505 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 506 ADRPReg) 507 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 508 509 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 511 ResultReg) 512 .addReg(ADRPReg) 513 .addGlobalAddress(GV, 0, 514 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 515 .addImm(0); 516 } 517 return ResultReg; 518 } 519 520 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 521 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 522 523 // Only handle simple types. 524 if (!CEVT.isSimple()) 525 return 0; 526 MVT VT = CEVT.getSimpleVT(); 527 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 528 // 'null' pointers need to have a somewhat special treatment. 529 if (isa<ConstantPointerNull>(C)) { 530 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 531 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 532 } 533 534 if (const auto *CI = dyn_cast<ConstantInt>(C)) 535 return materializeInt(CI, VT); 536 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 537 return materializeFP(CFP, VT); 538 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 539 return materializeGV(GV); 540 541 return 0; 542 } 543 544 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 545 assert(CFP->isNullValue() && 546 "Floating-point constant is not a positive zero."); 547 MVT VT; 548 if (!isTypeLegal(CFP->getType(), VT)) 549 return 0; 550 551 if (VT != MVT::f32 && VT != MVT::f64) 552 return 0; 553 554 bool Is64Bit = (VT == MVT::f64); 555 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 556 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 557 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 558 } 559 560 /// Check if the multiply is by a power-of-2 constant. 561 static bool isMulPowOf2(const Value *I) { 562 if (const auto *MI = dyn_cast<MulOperator>(I)) { 563 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 564 if (C->getValue().isPowerOf2()) 565 return true; 566 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 567 if (C->getValue().isPowerOf2()) 568 return true; 569 } 570 return false; 571 } 572 573 // Computes the address to get to an object. 574 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 575 { 576 const User *U = nullptr; 577 unsigned Opcode = Instruction::UserOp1; 578 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 579 // Don't walk into other basic blocks unless the object is an alloca from 580 // another block, otherwise it may not have a virtual register assigned. 581 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 582 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 583 Opcode = I->getOpcode(); 584 U = I; 585 } 586 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 587 Opcode = C->getOpcode(); 588 U = C; 589 } 590 591 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 592 if (Ty->getAddressSpace() > 255) 593 // Fast instruction selection doesn't support the special 594 // address spaces. 595 return false; 596 597 switch (Opcode) { 598 default: 599 break; 600 case Instruction::BitCast: 601 // Look through bitcasts. 602 return computeAddress(U->getOperand(0), Addr, Ty); 603 604 case Instruction::IntToPtr: 605 // Look past no-op inttoptrs. 606 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 607 TLI.getPointerTy(DL)) 608 return computeAddress(U->getOperand(0), Addr, Ty); 609 break; 610 611 case Instruction::PtrToInt: 612 // Look past no-op ptrtoints. 613 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 614 return computeAddress(U->getOperand(0), Addr, Ty); 615 break; 616 617 case Instruction::GetElementPtr: { 618 Address SavedAddr = Addr; 619 uint64_t TmpOffset = Addr.getOffset(); 620 621 // Iterate through the GEP folding the constants into offsets where 622 // we can. 623 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 624 GTI != E; ++GTI) { 625 const Value *Op = GTI.getOperand(); 626 if (StructType *STy = GTI.getStructTypeOrNull()) { 627 const StructLayout *SL = DL.getStructLayout(STy); 628 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 629 TmpOffset += SL->getElementOffset(Idx); 630 } else { 631 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 632 while (true) { 633 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 634 // Constant-offset addressing. 635 TmpOffset += CI->getSExtValue() * S; 636 break; 637 } 638 if (canFoldAddIntoGEP(U, Op)) { 639 // A compatible add with a constant operand. Fold the constant. 640 ConstantInt *CI = 641 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 642 TmpOffset += CI->getSExtValue() * S; 643 // Iterate on the other operand. 644 Op = cast<AddOperator>(Op)->getOperand(0); 645 continue; 646 } 647 // Unsupported 648 goto unsupported_gep; 649 } 650 } 651 } 652 653 // Try to grab the base operand now. 654 Addr.setOffset(TmpOffset); 655 if (computeAddress(U->getOperand(0), Addr, Ty)) 656 return true; 657 658 // We failed, restore everything and try the other options. 659 Addr = SavedAddr; 660 661 unsupported_gep: 662 break; 663 } 664 case Instruction::Alloca: { 665 const AllocaInst *AI = cast<AllocaInst>(Obj); 666 DenseMap<const AllocaInst *, int>::iterator SI = 667 FuncInfo.StaticAllocaMap.find(AI); 668 if (SI != FuncInfo.StaticAllocaMap.end()) { 669 Addr.setKind(Address::FrameIndexBase); 670 Addr.setFI(SI->second); 671 return true; 672 } 673 break; 674 } 675 case Instruction::Add: { 676 // Adds of constants are common and easy enough. 677 const Value *LHS = U->getOperand(0); 678 const Value *RHS = U->getOperand(1); 679 680 if (isa<ConstantInt>(LHS)) 681 std::swap(LHS, RHS); 682 683 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 684 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 685 return computeAddress(LHS, Addr, Ty); 686 } 687 688 Address Backup = Addr; 689 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 690 return true; 691 Addr = Backup; 692 693 break; 694 } 695 case Instruction::Sub: { 696 // Subs of constants are common and easy enough. 697 const Value *LHS = U->getOperand(0); 698 const Value *RHS = U->getOperand(1); 699 700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 701 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 702 return computeAddress(LHS, Addr, Ty); 703 } 704 break; 705 } 706 case Instruction::Shl: { 707 if (Addr.getOffsetReg()) 708 break; 709 710 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 711 if (!CI) 712 break; 713 714 unsigned Val = CI->getZExtValue(); 715 if (Val < 1 || Val > 3) 716 break; 717 718 uint64_t NumBytes = 0; 719 if (Ty && Ty->isSized()) { 720 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 721 NumBytes = NumBits / 8; 722 if (!isPowerOf2_64(NumBits)) 723 NumBytes = 0; 724 } 725 726 if (NumBytes != (1ULL << Val)) 727 break; 728 729 Addr.setShift(Val); 730 Addr.setExtendType(AArch64_AM::LSL); 731 732 const Value *Src = U->getOperand(0); 733 if (const auto *I = dyn_cast<Instruction>(Src)) { 734 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 735 // Fold the zext or sext when it won't become a noop. 736 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 737 if (!isIntExtFree(ZE) && 738 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 739 Addr.setExtendType(AArch64_AM::UXTW); 740 Src = ZE->getOperand(0); 741 } 742 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 743 if (!isIntExtFree(SE) && 744 SE->getOperand(0)->getType()->isIntegerTy(32)) { 745 Addr.setExtendType(AArch64_AM::SXTW); 746 Src = SE->getOperand(0); 747 } 748 } 749 } 750 } 751 752 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 753 if (AI->getOpcode() == Instruction::And) { 754 const Value *LHS = AI->getOperand(0); 755 const Value *RHS = AI->getOperand(1); 756 757 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 758 if (C->getValue() == 0xffffffff) 759 std::swap(LHS, RHS); 760 761 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 762 if (C->getValue() == 0xffffffff) { 763 Addr.setExtendType(AArch64_AM::UXTW); 764 unsigned Reg = getRegForValue(LHS); 765 if (!Reg) 766 return false; 767 bool RegIsKill = hasTrivialKill(LHS); 768 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 769 AArch64::sub_32); 770 Addr.setOffsetReg(Reg); 771 return true; 772 } 773 } 774 775 unsigned Reg = getRegForValue(Src); 776 if (!Reg) 777 return false; 778 Addr.setOffsetReg(Reg); 779 return true; 780 } 781 case Instruction::Mul: { 782 if (Addr.getOffsetReg()) 783 break; 784 785 if (!isMulPowOf2(U)) 786 break; 787 788 const Value *LHS = U->getOperand(0); 789 const Value *RHS = U->getOperand(1); 790 791 // Canonicalize power-of-2 value to the RHS. 792 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 793 if (C->getValue().isPowerOf2()) 794 std::swap(LHS, RHS); 795 796 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 797 const auto *C = cast<ConstantInt>(RHS); 798 unsigned Val = C->getValue().logBase2(); 799 if (Val < 1 || Val > 3) 800 break; 801 802 uint64_t NumBytes = 0; 803 if (Ty && Ty->isSized()) { 804 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 805 NumBytes = NumBits / 8; 806 if (!isPowerOf2_64(NumBits)) 807 NumBytes = 0; 808 } 809 810 if (NumBytes != (1ULL << Val)) 811 break; 812 813 Addr.setShift(Val); 814 Addr.setExtendType(AArch64_AM::LSL); 815 816 const Value *Src = LHS; 817 if (const auto *I = dyn_cast<Instruction>(Src)) { 818 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 819 // Fold the zext or sext when it won't become a noop. 820 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 821 if (!isIntExtFree(ZE) && 822 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 823 Addr.setExtendType(AArch64_AM::UXTW); 824 Src = ZE->getOperand(0); 825 } 826 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 827 if (!isIntExtFree(SE) && 828 SE->getOperand(0)->getType()->isIntegerTy(32)) { 829 Addr.setExtendType(AArch64_AM::SXTW); 830 Src = SE->getOperand(0); 831 } 832 } 833 } 834 } 835 836 unsigned Reg = getRegForValue(Src); 837 if (!Reg) 838 return false; 839 Addr.setOffsetReg(Reg); 840 return true; 841 } 842 case Instruction::And: { 843 if (Addr.getOffsetReg()) 844 break; 845 846 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 847 break; 848 849 const Value *LHS = U->getOperand(0); 850 const Value *RHS = U->getOperand(1); 851 852 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 853 if (C->getValue() == 0xffffffff) 854 std::swap(LHS, RHS); 855 856 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 857 if (C->getValue() == 0xffffffff) { 858 Addr.setShift(0); 859 Addr.setExtendType(AArch64_AM::LSL); 860 Addr.setExtendType(AArch64_AM::UXTW); 861 862 unsigned Reg = getRegForValue(LHS); 863 if (!Reg) 864 return false; 865 bool RegIsKill = hasTrivialKill(LHS); 866 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 867 AArch64::sub_32); 868 Addr.setOffsetReg(Reg); 869 return true; 870 } 871 break; 872 } 873 case Instruction::SExt: 874 case Instruction::ZExt: { 875 if (!Addr.getReg() || Addr.getOffsetReg()) 876 break; 877 878 const Value *Src = nullptr; 879 // Fold the zext or sext when it won't become a noop. 880 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 881 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 882 Addr.setExtendType(AArch64_AM::UXTW); 883 Src = ZE->getOperand(0); 884 } 885 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 886 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 887 Addr.setExtendType(AArch64_AM::SXTW); 888 Src = SE->getOperand(0); 889 } 890 } 891 892 if (!Src) 893 break; 894 895 Addr.setShift(0); 896 unsigned Reg = getRegForValue(Src); 897 if (!Reg) 898 return false; 899 Addr.setOffsetReg(Reg); 900 return true; 901 } 902 } // end switch 903 904 if (Addr.isRegBase() && !Addr.getReg()) { 905 unsigned Reg = getRegForValue(Obj); 906 if (!Reg) 907 return false; 908 Addr.setReg(Reg); 909 return true; 910 } 911 912 if (!Addr.getOffsetReg()) { 913 unsigned Reg = getRegForValue(Obj); 914 if (!Reg) 915 return false; 916 Addr.setOffsetReg(Reg); 917 return true; 918 } 919 920 return false; 921 } 922 923 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 924 const User *U = nullptr; 925 unsigned Opcode = Instruction::UserOp1; 926 bool InMBB = true; 927 928 if (const auto *I = dyn_cast<Instruction>(V)) { 929 Opcode = I->getOpcode(); 930 U = I; 931 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 932 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 933 Opcode = C->getOpcode(); 934 U = C; 935 } 936 937 switch (Opcode) { 938 default: break; 939 case Instruction::BitCast: 940 // Look past bitcasts if its operand is in the same BB. 941 if (InMBB) 942 return computeCallAddress(U->getOperand(0), Addr); 943 break; 944 case Instruction::IntToPtr: 945 // Look past no-op inttoptrs if its operand is in the same BB. 946 if (InMBB && 947 TLI.getValueType(DL, U->getOperand(0)->getType()) == 948 TLI.getPointerTy(DL)) 949 return computeCallAddress(U->getOperand(0), Addr); 950 break; 951 case Instruction::PtrToInt: 952 // Look past no-op ptrtoints if its operand is in the same BB. 953 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 954 return computeCallAddress(U->getOperand(0), Addr); 955 break; 956 } 957 958 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 959 Addr.setGlobalValue(GV); 960 return true; 961 } 962 963 // If all else fails, try to materialize the value in a register. 964 if (!Addr.getGlobalValue()) { 965 Addr.setReg(getRegForValue(V)); 966 return Addr.getReg() != 0; 967 } 968 969 return false; 970 } 971 972 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 973 EVT evt = TLI.getValueType(DL, Ty, true); 974 975 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 976 return false; 977 978 // Only handle simple types. 979 if (evt == MVT::Other || !evt.isSimple()) 980 return false; 981 VT = evt.getSimpleVT(); 982 983 // This is a legal type, but it's not something we handle in fast-isel. 984 if (VT == MVT::f128) 985 return false; 986 987 // Handle all other legal types, i.e. a register that will directly hold this 988 // value. 989 return TLI.isTypeLegal(VT); 990 } 991 992 /// Determine if the value type is supported by FastISel. 993 /// 994 /// FastISel for AArch64 can handle more value types than are legal. This adds 995 /// simple value type such as i1, i8, and i16. 996 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 997 if (Ty->isVectorTy() && !IsVectorAllowed) 998 return false; 999 1000 if (isTypeLegal(Ty, VT)) 1001 return true; 1002 1003 // If this is a type than can be sign or zero-extended to a basic operation 1004 // go ahead and accept it now. 1005 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1006 return true; 1007 1008 return false; 1009 } 1010 1011 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1012 if (!isa<Instruction>(V)) 1013 return true; 1014 1015 const auto *I = cast<Instruction>(V); 1016 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1017 } 1018 1019 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1020 if (Subtarget->isTargetILP32()) 1021 return false; 1022 1023 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1024 if (!ScaleFactor) 1025 return false; 1026 1027 bool ImmediateOffsetNeedsLowering = false; 1028 bool RegisterOffsetNeedsLowering = false; 1029 int64_t Offset = Addr.getOffset(); 1030 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1031 ImmediateOffsetNeedsLowering = true; 1032 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1033 !isUInt<12>(Offset / ScaleFactor)) 1034 ImmediateOffsetNeedsLowering = true; 1035 1036 // Cannot encode an offset register and an immediate offset in the same 1037 // instruction. Fold the immediate offset into the load/store instruction and 1038 // emit an additional add to take care of the offset register. 1039 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1040 RegisterOffsetNeedsLowering = true; 1041 1042 // Cannot encode zero register as base. 1043 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1044 RegisterOffsetNeedsLowering = true; 1045 1046 // If this is a stack pointer and the offset needs to be simplified then put 1047 // the alloca address into a register, set the base type back to register and 1048 // continue. This should almost never happen. 1049 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1050 { 1051 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1052 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1053 ResultReg) 1054 .addFrameIndex(Addr.getFI()) 1055 .addImm(0) 1056 .addImm(0); 1057 Addr.setKind(Address::RegBase); 1058 Addr.setReg(ResultReg); 1059 } 1060 1061 if (RegisterOffsetNeedsLowering) { 1062 unsigned ResultReg = 0; 1063 if (Addr.getReg()) { 1064 if (Addr.getExtendType() == AArch64_AM::SXTW || 1065 Addr.getExtendType() == AArch64_AM::UXTW ) 1066 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1067 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1068 /*TODO:IsKill=*/false, Addr.getExtendType(), 1069 Addr.getShift()); 1070 else 1071 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1072 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1073 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1074 Addr.getShift()); 1075 } else { 1076 if (Addr.getExtendType() == AArch64_AM::UXTW) 1077 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1078 /*Op0IsKill=*/false, Addr.getShift(), 1079 /*IsZExt=*/true); 1080 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1081 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1082 /*Op0IsKill=*/false, Addr.getShift(), 1083 /*IsZExt=*/false); 1084 else 1085 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1086 /*Op0IsKill=*/false, Addr.getShift()); 1087 } 1088 if (!ResultReg) 1089 return false; 1090 1091 Addr.setReg(ResultReg); 1092 Addr.setOffsetReg(0); 1093 Addr.setShift(0); 1094 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1095 } 1096 1097 // Since the offset is too large for the load/store instruction get the 1098 // reg+offset into a register. 1099 if (ImmediateOffsetNeedsLowering) { 1100 unsigned ResultReg; 1101 if (Addr.getReg()) 1102 // Try to fold the immediate into the add instruction. 1103 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1104 else 1105 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1106 1107 if (!ResultReg) 1108 return false; 1109 Addr.setReg(ResultReg); 1110 Addr.setOffset(0); 1111 } 1112 return true; 1113 } 1114 1115 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1116 const MachineInstrBuilder &MIB, 1117 MachineMemOperand::Flags Flags, 1118 unsigned ScaleFactor, 1119 MachineMemOperand *MMO) { 1120 int64_t Offset = Addr.getOffset() / ScaleFactor; 1121 // Frame base works a bit differently. Handle it separately. 1122 if (Addr.isFIBase()) { 1123 int FI = Addr.getFI(); 1124 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1125 // and alignment should be based on the VT. 1126 MMO = FuncInfo.MF->getMachineMemOperand( 1127 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1128 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1129 // Now add the rest of the operands. 1130 MIB.addFrameIndex(FI).addImm(Offset); 1131 } else { 1132 assert(Addr.isRegBase() && "Unexpected address kind."); 1133 const MCInstrDesc &II = MIB->getDesc(); 1134 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1135 Addr.setReg( 1136 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1137 Addr.setOffsetReg( 1138 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1139 if (Addr.getOffsetReg()) { 1140 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1141 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1142 Addr.getExtendType() == AArch64_AM::SXTX; 1143 MIB.addReg(Addr.getReg()); 1144 MIB.addReg(Addr.getOffsetReg()); 1145 MIB.addImm(IsSigned); 1146 MIB.addImm(Addr.getShift() != 0); 1147 } else 1148 MIB.addReg(Addr.getReg()).addImm(Offset); 1149 } 1150 1151 if (MMO) 1152 MIB.addMemOperand(MMO); 1153 } 1154 1155 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1156 const Value *RHS, bool SetFlags, 1157 bool WantResult, bool IsZExt) { 1158 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1159 bool NeedExtend = false; 1160 switch (RetVT.SimpleTy) { 1161 default: 1162 return 0; 1163 case MVT::i1: 1164 NeedExtend = true; 1165 break; 1166 case MVT::i8: 1167 NeedExtend = true; 1168 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1169 break; 1170 case MVT::i16: 1171 NeedExtend = true; 1172 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1173 break; 1174 case MVT::i32: // fall-through 1175 case MVT::i64: 1176 break; 1177 } 1178 MVT SrcVT = RetVT; 1179 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1180 1181 // Canonicalize immediates to the RHS first. 1182 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1183 std::swap(LHS, RHS); 1184 1185 // Canonicalize mul by power of 2 to the RHS. 1186 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1187 if (isMulPowOf2(LHS)) 1188 std::swap(LHS, RHS); 1189 1190 // Canonicalize shift immediate to the RHS. 1191 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1192 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1193 if (isa<ConstantInt>(SI->getOperand(1))) 1194 if (SI->getOpcode() == Instruction::Shl || 1195 SI->getOpcode() == Instruction::LShr || 1196 SI->getOpcode() == Instruction::AShr ) 1197 std::swap(LHS, RHS); 1198 1199 unsigned LHSReg = getRegForValue(LHS); 1200 if (!LHSReg) 1201 return 0; 1202 bool LHSIsKill = hasTrivialKill(LHS); 1203 1204 if (NeedExtend) 1205 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1206 1207 unsigned ResultReg = 0; 1208 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1209 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1210 if (C->isNegative()) 1211 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1212 SetFlags, WantResult); 1213 else 1214 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1215 WantResult); 1216 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1217 if (C->isNullValue()) 1218 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1219 WantResult); 1220 1221 if (ResultReg) 1222 return ResultReg; 1223 1224 // Only extend the RHS within the instruction if there is a valid extend type. 1225 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1226 isValueAvailable(RHS)) { 1227 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1228 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1229 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1230 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1231 if (!RHSReg) 1232 return 0; 1233 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1234 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1235 RHSIsKill, ExtendType, C->getZExtValue(), 1236 SetFlags, WantResult); 1237 } 1238 unsigned RHSReg = getRegForValue(RHS); 1239 if (!RHSReg) 1240 return 0; 1241 bool RHSIsKill = hasTrivialKill(RHS); 1242 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1243 ExtendType, 0, SetFlags, WantResult); 1244 } 1245 1246 // Check if the mul can be folded into the instruction. 1247 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1248 if (isMulPowOf2(RHS)) { 1249 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1250 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1251 1252 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1253 if (C->getValue().isPowerOf2()) 1254 std::swap(MulLHS, MulRHS); 1255 1256 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1257 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1258 unsigned RHSReg = getRegForValue(MulLHS); 1259 if (!RHSReg) 1260 return 0; 1261 bool RHSIsKill = hasTrivialKill(MulLHS); 1262 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1263 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1264 WantResult); 1265 if (ResultReg) 1266 return ResultReg; 1267 } 1268 } 1269 1270 // Check if the shift can be folded into the instruction. 1271 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1272 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1273 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1274 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1275 switch (SI->getOpcode()) { 1276 default: break; 1277 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1278 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1279 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1280 } 1281 uint64_t ShiftVal = C->getZExtValue(); 1282 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1283 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1284 if (!RHSReg) 1285 return 0; 1286 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1287 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1288 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1289 WantResult); 1290 if (ResultReg) 1291 return ResultReg; 1292 } 1293 } 1294 } 1295 } 1296 1297 unsigned RHSReg = getRegForValue(RHS); 1298 if (!RHSReg) 1299 return 0; 1300 bool RHSIsKill = hasTrivialKill(RHS); 1301 1302 if (NeedExtend) 1303 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1304 1305 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1306 SetFlags, WantResult); 1307 } 1308 1309 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1310 bool LHSIsKill, unsigned RHSReg, 1311 bool RHSIsKill, bool SetFlags, 1312 bool WantResult) { 1313 assert(LHSReg && RHSReg && "Invalid register number."); 1314 1315 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1316 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1317 return 0; 1318 1319 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1320 return 0; 1321 1322 static const unsigned OpcTable[2][2][2] = { 1323 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1324 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1325 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1326 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1327 }; 1328 bool Is64Bit = RetVT == MVT::i64; 1329 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1330 const TargetRegisterClass *RC = 1331 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1332 unsigned ResultReg; 1333 if (WantResult) 1334 ResultReg = createResultReg(RC); 1335 else 1336 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1337 1338 const MCInstrDesc &II = TII.get(Opc); 1339 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1340 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1341 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1342 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1343 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1344 return ResultReg; 1345 } 1346 1347 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1348 bool LHSIsKill, uint64_t Imm, 1349 bool SetFlags, bool WantResult) { 1350 assert(LHSReg && "Invalid register number."); 1351 1352 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1353 return 0; 1354 1355 unsigned ShiftImm; 1356 if (isUInt<12>(Imm)) 1357 ShiftImm = 0; 1358 else if ((Imm & 0xfff000) == Imm) { 1359 ShiftImm = 12; 1360 Imm >>= 12; 1361 } else 1362 return 0; 1363 1364 static const unsigned OpcTable[2][2][2] = { 1365 { { AArch64::SUBWri, AArch64::SUBXri }, 1366 { AArch64::ADDWri, AArch64::ADDXri } }, 1367 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1368 { AArch64::ADDSWri, AArch64::ADDSXri } } 1369 }; 1370 bool Is64Bit = RetVT == MVT::i64; 1371 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1372 const TargetRegisterClass *RC; 1373 if (SetFlags) 1374 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1375 else 1376 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1377 unsigned ResultReg; 1378 if (WantResult) 1379 ResultReg = createResultReg(RC); 1380 else 1381 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1382 1383 const MCInstrDesc &II = TII.get(Opc); 1384 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1386 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1387 .addImm(Imm) 1388 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1389 return ResultReg; 1390 } 1391 1392 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1393 bool LHSIsKill, unsigned RHSReg, 1394 bool RHSIsKill, 1395 AArch64_AM::ShiftExtendType ShiftType, 1396 uint64_t ShiftImm, bool SetFlags, 1397 bool WantResult) { 1398 assert(LHSReg && RHSReg && "Invalid register number."); 1399 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1400 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1401 1402 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1403 return 0; 1404 1405 // Don't deal with undefined shifts. 1406 if (ShiftImm >= RetVT.getSizeInBits()) 1407 return 0; 1408 1409 static const unsigned OpcTable[2][2][2] = { 1410 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1411 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1412 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1413 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1414 }; 1415 bool Is64Bit = RetVT == MVT::i64; 1416 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1417 const TargetRegisterClass *RC = 1418 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1419 unsigned ResultReg; 1420 if (WantResult) 1421 ResultReg = createResultReg(RC); 1422 else 1423 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1424 1425 const MCInstrDesc &II = TII.get(Opc); 1426 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1427 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1429 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1430 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1431 .addImm(getShifterImm(ShiftType, ShiftImm)); 1432 return ResultReg; 1433 } 1434 1435 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1436 bool LHSIsKill, unsigned RHSReg, 1437 bool RHSIsKill, 1438 AArch64_AM::ShiftExtendType ExtType, 1439 uint64_t ShiftImm, bool SetFlags, 1440 bool WantResult) { 1441 assert(LHSReg && RHSReg && "Invalid register number."); 1442 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1443 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1444 1445 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1446 return 0; 1447 1448 if (ShiftImm >= 4) 1449 return 0; 1450 1451 static const unsigned OpcTable[2][2][2] = { 1452 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1453 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1454 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1455 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1456 }; 1457 bool Is64Bit = RetVT == MVT::i64; 1458 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1459 const TargetRegisterClass *RC = nullptr; 1460 if (SetFlags) 1461 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1462 else 1463 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1464 unsigned ResultReg; 1465 if (WantResult) 1466 ResultReg = createResultReg(RC); 1467 else 1468 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1469 1470 const MCInstrDesc &II = TII.get(Opc); 1471 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1472 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1474 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1475 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1476 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1477 return ResultReg; 1478 } 1479 1480 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1481 Type *Ty = LHS->getType(); 1482 EVT EVT = TLI.getValueType(DL, Ty, true); 1483 if (!EVT.isSimple()) 1484 return false; 1485 MVT VT = EVT.getSimpleVT(); 1486 1487 switch (VT.SimpleTy) { 1488 default: 1489 return false; 1490 case MVT::i1: 1491 case MVT::i8: 1492 case MVT::i16: 1493 case MVT::i32: 1494 case MVT::i64: 1495 return emitICmp(VT, LHS, RHS, IsZExt); 1496 case MVT::f32: 1497 case MVT::f64: 1498 return emitFCmp(VT, LHS, RHS); 1499 } 1500 } 1501 1502 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1503 bool IsZExt) { 1504 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1505 IsZExt) != 0; 1506 } 1507 1508 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1509 uint64_t Imm) { 1510 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1511 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1512 } 1513 1514 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1515 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1516 return false; 1517 1518 // Check to see if the 2nd operand is a constant that we can encode directly 1519 // in the compare. 1520 bool UseImm = false; 1521 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1522 if (CFP->isZero() && !CFP->isNegative()) 1523 UseImm = true; 1524 1525 unsigned LHSReg = getRegForValue(LHS); 1526 if (!LHSReg) 1527 return false; 1528 bool LHSIsKill = hasTrivialKill(LHS); 1529 1530 if (UseImm) { 1531 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1533 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1534 return true; 1535 } 1536 1537 unsigned RHSReg = getRegForValue(RHS); 1538 if (!RHSReg) 1539 return false; 1540 bool RHSIsKill = hasTrivialKill(RHS); 1541 1542 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1543 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1544 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1545 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1546 return true; 1547 } 1548 1549 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1550 bool SetFlags, bool WantResult, bool IsZExt) { 1551 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1552 IsZExt); 1553 } 1554 1555 /// This method is a wrapper to simplify add emission. 1556 /// 1557 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1558 /// that fails, then try to materialize the immediate into a register and use 1559 /// emitAddSub_rr instead. 1560 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1561 int64_t Imm) { 1562 unsigned ResultReg; 1563 if (Imm < 0) 1564 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1565 else 1566 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1567 1568 if (ResultReg) 1569 return ResultReg; 1570 1571 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1572 if (!CReg) 1573 return 0; 1574 1575 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1576 return ResultReg; 1577 } 1578 1579 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1580 bool SetFlags, bool WantResult, bool IsZExt) { 1581 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1582 IsZExt); 1583 } 1584 1585 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1586 bool LHSIsKill, unsigned RHSReg, 1587 bool RHSIsKill, bool WantResult) { 1588 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1589 RHSIsKill, /*SetFlags=*/true, WantResult); 1590 } 1591 1592 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1593 bool LHSIsKill, unsigned RHSReg, 1594 bool RHSIsKill, 1595 AArch64_AM::ShiftExtendType ShiftType, 1596 uint64_t ShiftImm, bool WantResult) { 1597 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1598 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1599 WantResult); 1600 } 1601 1602 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1603 const Value *LHS, const Value *RHS) { 1604 // Canonicalize immediates to the RHS first. 1605 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1606 std::swap(LHS, RHS); 1607 1608 // Canonicalize mul by power-of-2 to the RHS. 1609 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1610 if (isMulPowOf2(LHS)) 1611 std::swap(LHS, RHS); 1612 1613 // Canonicalize shift immediate to the RHS. 1614 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1615 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1616 if (isa<ConstantInt>(SI->getOperand(1))) 1617 std::swap(LHS, RHS); 1618 1619 unsigned LHSReg = getRegForValue(LHS); 1620 if (!LHSReg) 1621 return 0; 1622 bool LHSIsKill = hasTrivialKill(LHS); 1623 1624 unsigned ResultReg = 0; 1625 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1626 uint64_t Imm = C->getZExtValue(); 1627 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1628 } 1629 if (ResultReg) 1630 return ResultReg; 1631 1632 // Check if the mul can be folded into the instruction. 1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1634 if (isMulPowOf2(RHS)) { 1635 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1636 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1637 1638 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1639 if (C->getValue().isPowerOf2()) 1640 std::swap(MulLHS, MulRHS); 1641 1642 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1643 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1644 1645 unsigned RHSReg = getRegForValue(MulLHS); 1646 if (!RHSReg) 1647 return 0; 1648 bool RHSIsKill = hasTrivialKill(MulLHS); 1649 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1650 RHSIsKill, ShiftVal); 1651 if (ResultReg) 1652 return ResultReg; 1653 } 1654 } 1655 1656 // Check if the shift can be folded into the instruction. 1657 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1658 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1659 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1660 uint64_t ShiftVal = C->getZExtValue(); 1661 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1662 if (!RHSReg) 1663 return 0; 1664 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1665 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1666 RHSIsKill, ShiftVal); 1667 if (ResultReg) 1668 return ResultReg; 1669 } 1670 } 1671 1672 unsigned RHSReg = getRegForValue(RHS); 1673 if (!RHSReg) 1674 return 0; 1675 bool RHSIsKill = hasTrivialKill(RHS); 1676 1677 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1678 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1679 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1680 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1681 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1682 } 1683 return ResultReg; 1684 } 1685 1686 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1687 unsigned LHSReg, bool LHSIsKill, 1688 uint64_t Imm) { 1689 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1690 "ISD nodes are not consecutive!"); 1691 static const unsigned OpcTable[3][2] = { 1692 { AArch64::ANDWri, AArch64::ANDXri }, 1693 { AArch64::ORRWri, AArch64::ORRXri }, 1694 { AArch64::EORWri, AArch64::EORXri } 1695 }; 1696 const TargetRegisterClass *RC; 1697 unsigned Opc; 1698 unsigned RegSize; 1699 switch (RetVT.SimpleTy) { 1700 default: 1701 return 0; 1702 case MVT::i1: 1703 case MVT::i8: 1704 case MVT::i16: 1705 case MVT::i32: { 1706 unsigned Idx = ISDOpc - ISD::AND; 1707 Opc = OpcTable[Idx][0]; 1708 RC = &AArch64::GPR32spRegClass; 1709 RegSize = 32; 1710 break; 1711 } 1712 case MVT::i64: 1713 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1714 RC = &AArch64::GPR64spRegClass; 1715 RegSize = 64; 1716 break; 1717 } 1718 1719 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1720 return 0; 1721 1722 unsigned ResultReg = 1723 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1724 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1725 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1726 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1727 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1728 } 1729 return ResultReg; 1730 } 1731 1732 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1733 unsigned LHSReg, bool LHSIsKill, 1734 unsigned RHSReg, bool RHSIsKill, 1735 uint64_t ShiftImm) { 1736 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1737 "ISD nodes are not consecutive!"); 1738 static const unsigned OpcTable[3][2] = { 1739 { AArch64::ANDWrs, AArch64::ANDXrs }, 1740 { AArch64::ORRWrs, AArch64::ORRXrs }, 1741 { AArch64::EORWrs, AArch64::EORXrs } 1742 }; 1743 1744 // Don't deal with undefined shifts. 1745 if (ShiftImm >= RetVT.getSizeInBits()) 1746 return 0; 1747 1748 const TargetRegisterClass *RC; 1749 unsigned Opc; 1750 switch (RetVT.SimpleTy) { 1751 default: 1752 return 0; 1753 case MVT::i1: 1754 case MVT::i8: 1755 case MVT::i16: 1756 case MVT::i32: 1757 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1758 RC = &AArch64::GPR32RegClass; 1759 break; 1760 case MVT::i64: 1761 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1762 RC = &AArch64::GPR64RegClass; 1763 break; 1764 } 1765 unsigned ResultReg = 1766 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1767 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1768 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1769 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1770 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1771 } 1772 return ResultReg; 1773 } 1774 1775 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1776 uint64_t Imm) { 1777 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1778 } 1779 1780 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1781 bool WantZExt, MachineMemOperand *MMO) { 1782 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1783 return 0; 1784 1785 // Simplify this down to something we can handle. 1786 if (!simplifyAddress(Addr, VT)) 1787 return 0; 1788 1789 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1790 if (!ScaleFactor) 1791 llvm_unreachable("Unexpected value type."); 1792 1793 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1794 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1795 bool UseScaled = true; 1796 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1797 UseScaled = false; 1798 ScaleFactor = 1; 1799 } 1800 1801 static const unsigned GPOpcTable[2][8][4] = { 1802 // Sign-extend. 1803 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1804 AArch64::LDURXi }, 1805 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1806 AArch64::LDURXi }, 1807 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1808 AArch64::LDRXui }, 1809 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1810 AArch64::LDRXui }, 1811 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1812 AArch64::LDRXroX }, 1813 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1814 AArch64::LDRXroX }, 1815 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1816 AArch64::LDRXroW }, 1817 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1818 AArch64::LDRXroW } 1819 }, 1820 // Zero-extend. 1821 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1822 AArch64::LDURXi }, 1823 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1824 AArch64::LDURXi }, 1825 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1826 AArch64::LDRXui }, 1827 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1828 AArch64::LDRXui }, 1829 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1830 AArch64::LDRXroX }, 1831 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1832 AArch64::LDRXroX }, 1833 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1834 AArch64::LDRXroW }, 1835 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1836 AArch64::LDRXroW } 1837 } 1838 }; 1839 1840 static const unsigned FPOpcTable[4][2] = { 1841 { AArch64::LDURSi, AArch64::LDURDi }, 1842 { AArch64::LDRSui, AArch64::LDRDui }, 1843 { AArch64::LDRSroX, AArch64::LDRDroX }, 1844 { AArch64::LDRSroW, AArch64::LDRDroW } 1845 }; 1846 1847 unsigned Opc; 1848 const TargetRegisterClass *RC; 1849 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1850 Addr.getOffsetReg(); 1851 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1852 if (Addr.getExtendType() == AArch64_AM::UXTW || 1853 Addr.getExtendType() == AArch64_AM::SXTW) 1854 Idx++; 1855 1856 bool IsRet64Bit = RetVT == MVT::i64; 1857 switch (VT.SimpleTy) { 1858 default: 1859 llvm_unreachable("Unexpected value type."); 1860 case MVT::i1: // Intentional fall-through. 1861 case MVT::i8: 1862 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1863 RC = (IsRet64Bit && !WantZExt) ? 1864 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1865 break; 1866 case MVT::i16: 1867 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1868 RC = (IsRet64Bit && !WantZExt) ? 1869 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1870 break; 1871 case MVT::i32: 1872 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1873 RC = (IsRet64Bit && !WantZExt) ? 1874 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1875 break; 1876 case MVT::i64: 1877 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1878 RC = &AArch64::GPR64RegClass; 1879 break; 1880 case MVT::f32: 1881 Opc = FPOpcTable[Idx][0]; 1882 RC = &AArch64::FPR32RegClass; 1883 break; 1884 case MVT::f64: 1885 Opc = FPOpcTable[Idx][1]; 1886 RC = &AArch64::FPR64RegClass; 1887 break; 1888 } 1889 1890 // Create the base instruction, then add the operands. 1891 unsigned ResultReg = createResultReg(RC); 1892 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1893 TII.get(Opc), ResultReg); 1894 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1895 1896 // Loading an i1 requires special handling. 1897 if (VT == MVT::i1) { 1898 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1899 assert(ANDReg && "Unexpected AND instruction emission failure."); 1900 ResultReg = ANDReg; 1901 } 1902 1903 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1904 // the 32bit reg to a 64bit reg. 1905 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1906 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1907 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1908 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1909 .addImm(0) 1910 .addReg(ResultReg, getKillRegState(true)) 1911 .addImm(AArch64::sub_32); 1912 ResultReg = Reg64; 1913 } 1914 return ResultReg; 1915 } 1916 1917 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1918 MVT VT; 1919 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1920 return false; 1921 1922 if (VT.isVector()) 1923 return selectOperator(I, I->getOpcode()); 1924 1925 unsigned ResultReg; 1926 switch (I->getOpcode()) { 1927 default: 1928 llvm_unreachable("Unexpected instruction."); 1929 case Instruction::Add: 1930 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 case Instruction::Sub: 1933 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 } 1936 if (!ResultReg) 1937 return false; 1938 1939 updateValueMap(I, ResultReg); 1940 return true; 1941 } 1942 1943 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1944 MVT VT; 1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1946 return false; 1947 1948 if (VT.isVector()) 1949 return selectOperator(I, I->getOpcode()); 1950 1951 unsigned ResultReg; 1952 switch (I->getOpcode()) { 1953 default: 1954 llvm_unreachable("Unexpected instruction."); 1955 case Instruction::And: 1956 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1957 break; 1958 case Instruction::Or: 1959 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1960 break; 1961 case Instruction::Xor: 1962 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1963 break; 1964 } 1965 if (!ResultReg) 1966 return false; 1967 1968 updateValueMap(I, ResultReg); 1969 return true; 1970 } 1971 1972 bool AArch64FastISel::selectLoad(const Instruction *I) { 1973 MVT VT; 1974 // Verify we have a legal type before going any further. Currently, we handle 1975 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1976 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1977 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1978 cast<LoadInst>(I)->isAtomic()) 1979 return false; 1980 1981 const Value *SV = I->getOperand(0); 1982 if (TLI.supportSwiftError()) { 1983 // Swifterror values can come from either a function parameter with 1984 // swifterror attribute or an alloca with swifterror attribute. 1985 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1986 if (Arg->hasSwiftErrorAttr()) 1987 return false; 1988 } 1989 1990 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1991 if (Alloca->isSwiftError()) 1992 return false; 1993 } 1994 } 1995 1996 // See if we can handle this address. 1997 Address Addr; 1998 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1999 return false; 2000 2001 // Fold the following sign-/zero-extend into the load instruction. 2002 bool WantZExt = true; 2003 MVT RetVT = VT; 2004 const Value *IntExtVal = nullptr; 2005 if (I->hasOneUse()) { 2006 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 2007 if (isTypeSupported(ZE->getType(), RetVT)) 2008 IntExtVal = ZE; 2009 else 2010 RetVT = VT; 2011 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 2012 if (isTypeSupported(SE->getType(), RetVT)) 2013 IntExtVal = SE; 2014 else 2015 RetVT = VT; 2016 WantZExt = false; 2017 } 2018 } 2019 2020 unsigned ResultReg = 2021 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 2022 if (!ResultReg) 2023 return false; 2024 2025 // There are a few different cases we have to handle, because the load or the 2026 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2027 // SelectionDAG. There is also an ordering issue when both instructions are in 2028 // different basic blocks. 2029 // 1.) The load instruction is selected by FastISel, but the integer extend 2030 // not. This usually happens when the integer extend is in a different 2031 // basic block and SelectionDAG took over for that basic block. 2032 // 2.) The load instruction is selected before the integer extend. This only 2033 // happens when the integer extend is in a different basic block. 2034 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2035 // by FastISel. This happens if there are instructions between the load 2036 // and the integer extend that couldn't be selected by FastISel. 2037 if (IntExtVal) { 2038 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2039 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2040 // it when it selects the integer extend. 2041 unsigned Reg = lookUpRegForValue(IntExtVal); 2042 auto *MI = MRI.getUniqueVRegDef(Reg); 2043 if (!MI) { 2044 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2045 if (WantZExt) { 2046 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2047 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2048 ResultReg = std::prev(I)->getOperand(0).getReg(); 2049 removeDeadCode(I, std::next(I)); 2050 } else 2051 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2052 /*IsKill=*/true, 2053 AArch64::sub_32); 2054 } 2055 updateValueMap(I, ResultReg); 2056 return true; 2057 } 2058 2059 // The integer extend has already been emitted - delete all the instructions 2060 // that have been emitted by the integer extend lowering code and use the 2061 // result from the load instruction directly. 2062 while (MI) { 2063 Reg = 0; 2064 for (auto &Opnd : MI->uses()) { 2065 if (Opnd.isReg()) { 2066 Reg = Opnd.getReg(); 2067 break; 2068 } 2069 } 2070 MachineBasicBlock::iterator I(MI); 2071 removeDeadCode(I, std::next(I)); 2072 MI = nullptr; 2073 if (Reg) 2074 MI = MRI.getUniqueVRegDef(Reg); 2075 } 2076 updateValueMap(IntExtVal, ResultReg); 2077 return true; 2078 } 2079 2080 updateValueMap(I, ResultReg); 2081 return true; 2082 } 2083 2084 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2085 unsigned AddrReg, 2086 MachineMemOperand *MMO) { 2087 unsigned Opc; 2088 switch (VT.SimpleTy) { 2089 default: return false; 2090 case MVT::i8: Opc = AArch64::STLRB; break; 2091 case MVT::i16: Opc = AArch64::STLRH; break; 2092 case MVT::i32: Opc = AArch64::STLRW; break; 2093 case MVT::i64: Opc = AArch64::STLRX; break; 2094 } 2095 2096 const MCInstrDesc &II = TII.get(Opc); 2097 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2098 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2099 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2100 .addReg(SrcReg) 2101 .addReg(AddrReg) 2102 .addMemOperand(MMO); 2103 return true; 2104 } 2105 2106 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2107 MachineMemOperand *MMO) { 2108 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2109 return false; 2110 2111 // Simplify this down to something we can handle. 2112 if (!simplifyAddress(Addr, VT)) 2113 return false; 2114 2115 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2116 if (!ScaleFactor) 2117 llvm_unreachable("Unexpected value type."); 2118 2119 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2120 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2121 bool UseScaled = true; 2122 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2123 UseScaled = false; 2124 ScaleFactor = 1; 2125 } 2126 2127 static const unsigned OpcTable[4][6] = { 2128 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2129 AArch64::STURSi, AArch64::STURDi }, 2130 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2131 AArch64::STRSui, AArch64::STRDui }, 2132 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2133 AArch64::STRSroX, AArch64::STRDroX }, 2134 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2135 AArch64::STRSroW, AArch64::STRDroW } 2136 }; 2137 2138 unsigned Opc; 2139 bool VTIsi1 = false; 2140 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2141 Addr.getOffsetReg(); 2142 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2143 if (Addr.getExtendType() == AArch64_AM::UXTW || 2144 Addr.getExtendType() == AArch64_AM::SXTW) 2145 Idx++; 2146 2147 switch (VT.SimpleTy) { 2148 default: llvm_unreachable("Unexpected value type."); 2149 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2150 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2151 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2152 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2153 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2154 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2155 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2156 } 2157 2158 // Storing an i1 requires special handling. 2159 if (VTIsi1 && SrcReg != AArch64::WZR) { 2160 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2161 assert(ANDReg && "Unexpected AND instruction emission failure."); 2162 SrcReg = ANDReg; 2163 } 2164 // Create the base instruction, then add the operands. 2165 const MCInstrDesc &II = TII.get(Opc); 2166 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2167 MachineInstrBuilder MIB = 2168 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2169 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2170 2171 return true; 2172 } 2173 2174 bool AArch64FastISel::selectStore(const Instruction *I) { 2175 MVT VT; 2176 const Value *Op0 = I->getOperand(0); 2177 // Verify we have a legal type before going any further. Currently, we handle 2178 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2179 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2180 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2181 return false; 2182 2183 const Value *PtrV = I->getOperand(1); 2184 if (TLI.supportSwiftError()) { 2185 // Swifterror values can come from either a function parameter with 2186 // swifterror attribute or an alloca with swifterror attribute. 2187 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2188 if (Arg->hasSwiftErrorAttr()) 2189 return false; 2190 } 2191 2192 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2193 if (Alloca->isSwiftError()) 2194 return false; 2195 } 2196 } 2197 2198 // Get the value to be stored into a register. Use the zero register directly 2199 // when possible to avoid an unnecessary copy and a wasted register. 2200 unsigned SrcReg = 0; 2201 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2202 if (CI->isZero()) 2203 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2204 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2205 if (CF->isZero() && !CF->isNegative()) { 2206 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2207 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2208 } 2209 } 2210 2211 if (!SrcReg) 2212 SrcReg = getRegForValue(Op0); 2213 2214 if (!SrcReg) 2215 return false; 2216 2217 auto *SI = cast<StoreInst>(I); 2218 2219 // Try to emit a STLR for seq_cst/release. 2220 if (SI->isAtomic()) { 2221 AtomicOrdering Ord = SI->getOrdering(); 2222 // The non-atomic instructions are sufficient for relaxed stores. 2223 if (isReleaseOrStronger(Ord)) { 2224 // The STLR addressing mode only supports a base reg; pass that directly. 2225 unsigned AddrReg = getRegForValue(PtrV); 2226 return emitStoreRelease(VT, SrcReg, AddrReg, 2227 createMachineMemOperandFor(I)); 2228 } 2229 } 2230 2231 // See if we can handle this address. 2232 Address Addr; 2233 if (!computeAddress(PtrV, Addr, Op0->getType())) 2234 return false; 2235 2236 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2237 return false; 2238 return true; 2239 } 2240 2241 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2242 switch (Pred) { 2243 case CmpInst::FCMP_ONE: 2244 case CmpInst::FCMP_UEQ: 2245 default: 2246 // AL is our "false" for now. The other two need more compares. 2247 return AArch64CC::AL; 2248 case CmpInst::ICMP_EQ: 2249 case CmpInst::FCMP_OEQ: 2250 return AArch64CC::EQ; 2251 case CmpInst::ICMP_SGT: 2252 case CmpInst::FCMP_OGT: 2253 return AArch64CC::GT; 2254 case CmpInst::ICMP_SGE: 2255 case CmpInst::FCMP_OGE: 2256 return AArch64CC::GE; 2257 case CmpInst::ICMP_UGT: 2258 case CmpInst::FCMP_UGT: 2259 return AArch64CC::HI; 2260 case CmpInst::FCMP_OLT: 2261 return AArch64CC::MI; 2262 case CmpInst::ICMP_ULE: 2263 case CmpInst::FCMP_OLE: 2264 return AArch64CC::LS; 2265 case CmpInst::FCMP_ORD: 2266 return AArch64CC::VC; 2267 case CmpInst::FCMP_UNO: 2268 return AArch64CC::VS; 2269 case CmpInst::FCMP_UGE: 2270 return AArch64CC::PL; 2271 case CmpInst::ICMP_SLT: 2272 case CmpInst::FCMP_ULT: 2273 return AArch64CC::LT; 2274 case CmpInst::ICMP_SLE: 2275 case CmpInst::FCMP_ULE: 2276 return AArch64CC::LE; 2277 case CmpInst::FCMP_UNE: 2278 case CmpInst::ICMP_NE: 2279 return AArch64CC::NE; 2280 case CmpInst::ICMP_UGE: 2281 return AArch64CC::HS; 2282 case CmpInst::ICMP_ULT: 2283 return AArch64CC::LO; 2284 } 2285 } 2286 2287 /// Try to emit a combined compare-and-branch instruction. 2288 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2289 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2290 // will not be produced, as they are conditional branch instructions that do 2291 // not set flags. 2292 if (FuncInfo.MF->getFunction().hasFnAttribute( 2293 Attribute::SpeculativeLoadHardening)) 2294 return false; 2295 2296 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2297 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2298 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2299 2300 const Value *LHS = CI->getOperand(0); 2301 const Value *RHS = CI->getOperand(1); 2302 2303 MVT VT; 2304 if (!isTypeSupported(LHS->getType(), VT)) 2305 return false; 2306 2307 unsigned BW = VT.getSizeInBits(); 2308 if (BW > 64) 2309 return false; 2310 2311 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2312 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2313 2314 // Try to take advantage of fallthrough opportunities. 2315 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2316 std::swap(TBB, FBB); 2317 Predicate = CmpInst::getInversePredicate(Predicate); 2318 } 2319 2320 int TestBit = -1; 2321 bool IsCmpNE; 2322 switch (Predicate) { 2323 default: 2324 return false; 2325 case CmpInst::ICMP_EQ: 2326 case CmpInst::ICMP_NE: 2327 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2328 std::swap(LHS, RHS); 2329 2330 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2331 return false; 2332 2333 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2334 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2335 const Value *AndLHS = AI->getOperand(0); 2336 const Value *AndRHS = AI->getOperand(1); 2337 2338 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2339 if (C->getValue().isPowerOf2()) 2340 std::swap(AndLHS, AndRHS); 2341 2342 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2343 if (C->getValue().isPowerOf2()) { 2344 TestBit = C->getValue().logBase2(); 2345 LHS = AndLHS; 2346 } 2347 } 2348 2349 if (VT == MVT::i1) 2350 TestBit = 0; 2351 2352 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2353 break; 2354 case CmpInst::ICMP_SLT: 2355 case CmpInst::ICMP_SGE: 2356 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2357 return false; 2358 2359 TestBit = BW - 1; 2360 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2361 break; 2362 case CmpInst::ICMP_SGT: 2363 case CmpInst::ICMP_SLE: 2364 if (!isa<ConstantInt>(RHS)) 2365 return false; 2366 2367 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2368 return false; 2369 2370 TestBit = BW - 1; 2371 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2372 break; 2373 } // end switch 2374 2375 static const unsigned OpcTable[2][2][2] = { 2376 { {AArch64::CBZW, AArch64::CBZX }, 2377 {AArch64::CBNZW, AArch64::CBNZX} }, 2378 { {AArch64::TBZW, AArch64::TBZX }, 2379 {AArch64::TBNZW, AArch64::TBNZX} } 2380 }; 2381 2382 bool IsBitTest = TestBit != -1; 2383 bool Is64Bit = BW == 64; 2384 if (TestBit < 32 && TestBit >= 0) 2385 Is64Bit = false; 2386 2387 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2388 const MCInstrDesc &II = TII.get(Opc); 2389 2390 unsigned SrcReg = getRegForValue(LHS); 2391 if (!SrcReg) 2392 return false; 2393 bool SrcIsKill = hasTrivialKill(LHS); 2394 2395 if (BW == 64 && !Is64Bit) 2396 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2397 AArch64::sub_32); 2398 2399 if ((BW < 32) && !IsBitTest) 2400 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2401 2402 // Emit the combined compare and branch instruction. 2403 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2404 MachineInstrBuilder MIB = 2405 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2406 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2407 if (IsBitTest) 2408 MIB.addImm(TestBit); 2409 MIB.addMBB(TBB); 2410 2411 finishCondBranch(BI->getParent(), TBB, FBB); 2412 return true; 2413 } 2414 2415 bool AArch64FastISel::selectBranch(const Instruction *I) { 2416 const BranchInst *BI = cast<BranchInst>(I); 2417 if (BI->isUnconditional()) { 2418 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2419 fastEmitBranch(MSucc, BI->getDebugLoc()); 2420 return true; 2421 } 2422 2423 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2424 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2425 2426 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2427 if (CI->hasOneUse() && isValueAvailable(CI)) { 2428 // Try to optimize or fold the cmp. 2429 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2430 switch (Predicate) { 2431 default: 2432 break; 2433 case CmpInst::FCMP_FALSE: 2434 fastEmitBranch(FBB, DbgLoc); 2435 return true; 2436 case CmpInst::FCMP_TRUE: 2437 fastEmitBranch(TBB, DbgLoc); 2438 return true; 2439 } 2440 2441 // Try to emit a combined compare-and-branch first. 2442 if (emitCompareAndBranch(BI)) 2443 return true; 2444 2445 // Try to take advantage of fallthrough opportunities. 2446 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2447 std::swap(TBB, FBB); 2448 Predicate = CmpInst::getInversePredicate(Predicate); 2449 } 2450 2451 // Emit the cmp. 2452 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2453 return false; 2454 2455 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2456 // instruction. 2457 AArch64CC::CondCode CC = getCompareCC(Predicate); 2458 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2459 switch (Predicate) { 2460 default: 2461 break; 2462 case CmpInst::FCMP_UEQ: 2463 ExtraCC = AArch64CC::EQ; 2464 CC = AArch64CC::VS; 2465 break; 2466 case CmpInst::FCMP_ONE: 2467 ExtraCC = AArch64CC::MI; 2468 CC = AArch64CC::GT; 2469 break; 2470 } 2471 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2472 2473 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2474 if (ExtraCC != AArch64CC::AL) { 2475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2476 .addImm(ExtraCC) 2477 .addMBB(TBB); 2478 } 2479 2480 // Emit the branch. 2481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2482 .addImm(CC) 2483 .addMBB(TBB); 2484 2485 finishCondBranch(BI->getParent(), TBB, FBB); 2486 return true; 2487 } 2488 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2489 uint64_t Imm = CI->getZExtValue(); 2490 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2491 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2492 .addMBB(Target); 2493 2494 // Obtain the branch probability and add the target to the successor list. 2495 if (FuncInfo.BPI) { 2496 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2497 BI->getParent(), Target->getBasicBlock()); 2498 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2499 } else 2500 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2501 return true; 2502 } else { 2503 AArch64CC::CondCode CC = AArch64CC::NE; 2504 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2505 // Fake request the condition, otherwise the intrinsic might be completely 2506 // optimized away. 2507 unsigned CondReg = getRegForValue(BI->getCondition()); 2508 if (!CondReg) 2509 return false; 2510 2511 // Emit the branch. 2512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2513 .addImm(CC) 2514 .addMBB(TBB); 2515 2516 finishCondBranch(BI->getParent(), TBB, FBB); 2517 return true; 2518 } 2519 } 2520 2521 unsigned CondReg = getRegForValue(BI->getCondition()); 2522 if (CondReg == 0) 2523 return false; 2524 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2525 2526 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2527 unsigned Opcode = AArch64::TBNZW; 2528 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2529 std::swap(TBB, FBB); 2530 Opcode = AArch64::TBZW; 2531 } 2532 2533 const MCInstrDesc &II = TII.get(Opcode); 2534 unsigned ConstrainedCondReg 2535 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2536 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2537 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2538 .addImm(0) 2539 .addMBB(TBB); 2540 2541 finishCondBranch(BI->getParent(), TBB, FBB); 2542 return true; 2543 } 2544 2545 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2546 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2547 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2548 if (AddrReg == 0) 2549 return false; 2550 2551 // Emit the indirect branch. 2552 const MCInstrDesc &II = TII.get(AArch64::BR); 2553 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2554 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2555 2556 // Make sure the CFG is up-to-date. 2557 for (auto *Succ : BI->successors()) 2558 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2559 2560 return true; 2561 } 2562 2563 bool AArch64FastISel::selectCmp(const Instruction *I) { 2564 const CmpInst *CI = cast<CmpInst>(I); 2565 2566 // Vectors of i1 are weird: bail out. 2567 if (CI->getType()->isVectorTy()) 2568 return false; 2569 2570 // Try to optimize or fold the cmp. 2571 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2572 unsigned ResultReg = 0; 2573 switch (Predicate) { 2574 default: 2575 break; 2576 case CmpInst::FCMP_FALSE: 2577 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2578 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2579 TII.get(TargetOpcode::COPY), ResultReg) 2580 .addReg(AArch64::WZR, getKillRegState(true)); 2581 break; 2582 case CmpInst::FCMP_TRUE: 2583 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2584 break; 2585 } 2586 2587 if (ResultReg) { 2588 updateValueMap(I, ResultReg); 2589 return true; 2590 } 2591 2592 // Emit the cmp. 2593 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2594 return false; 2595 2596 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2597 2598 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2599 // condition codes are inverted, because they are used by CSINC. 2600 static unsigned CondCodeTable[2][2] = { 2601 { AArch64CC::NE, AArch64CC::VC }, 2602 { AArch64CC::PL, AArch64CC::LE } 2603 }; 2604 unsigned *CondCodes = nullptr; 2605 switch (Predicate) { 2606 default: 2607 break; 2608 case CmpInst::FCMP_UEQ: 2609 CondCodes = &CondCodeTable[0][0]; 2610 break; 2611 case CmpInst::FCMP_ONE: 2612 CondCodes = &CondCodeTable[1][0]; 2613 break; 2614 } 2615 2616 if (CondCodes) { 2617 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2618 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2619 TmpReg1) 2620 .addReg(AArch64::WZR, getKillRegState(true)) 2621 .addReg(AArch64::WZR, getKillRegState(true)) 2622 .addImm(CondCodes[0]); 2623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2624 ResultReg) 2625 .addReg(TmpReg1, getKillRegState(true)) 2626 .addReg(AArch64::WZR, getKillRegState(true)) 2627 .addImm(CondCodes[1]); 2628 2629 updateValueMap(I, ResultReg); 2630 return true; 2631 } 2632 2633 // Now set a register based on the comparison. 2634 AArch64CC::CondCode CC = getCompareCC(Predicate); 2635 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2636 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2637 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2638 ResultReg) 2639 .addReg(AArch64::WZR, getKillRegState(true)) 2640 .addReg(AArch64::WZR, getKillRegState(true)) 2641 .addImm(invertedCC); 2642 2643 updateValueMap(I, ResultReg); 2644 return true; 2645 } 2646 2647 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2648 /// value. 2649 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2650 if (!SI->getType()->isIntegerTy(1)) 2651 return false; 2652 2653 const Value *Src1Val, *Src2Val; 2654 unsigned Opc = 0; 2655 bool NeedExtraOp = false; 2656 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2657 if (CI->isOne()) { 2658 Src1Val = SI->getCondition(); 2659 Src2Val = SI->getFalseValue(); 2660 Opc = AArch64::ORRWrr; 2661 } else { 2662 assert(CI->isZero()); 2663 Src1Val = SI->getFalseValue(); 2664 Src2Val = SI->getCondition(); 2665 Opc = AArch64::BICWrr; 2666 } 2667 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2668 if (CI->isOne()) { 2669 Src1Val = SI->getCondition(); 2670 Src2Val = SI->getTrueValue(); 2671 Opc = AArch64::ORRWrr; 2672 NeedExtraOp = true; 2673 } else { 2674 assert(CI->isZero()); 2675 Src1Val = SI->getCondition(); 2676 Src2Val = SI->getTrueValue(); 2677 Opc = AArch64::ANDWrr; 2678 } 2679 } 2680 2681 if (!Opc) 2682 return false; 2683 2684 unsigned Src1Reg = getRegForValue(Src1Val); 2685 if (!Src1Reg) 2686 return false; 2687 bool Src1IsKill = hasTrivialKill(Src1Val); 2688 2689 unsigned Src2Reg = getRegForValue(Src2Val); 2690 if (!Src2Reg) 2691 return false; 2692 bool Src2IsKill = hasTrivialKill(Src2Val); 2693 2694 if (NeedExtraOp) { 2695 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2696 Src1IsKill = true; 2697 } 2698 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2699 Src1IsKill, Src2Reg, Src2IsKill); 2700 updateValueMap(SI, ResultReg); 2701 return true; 2702 } 2703 2704 bool AArch64FastISel::selectSelect(const Instruction *I) { 2705 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2706 MVT VT; 2707 if (!isTypeSupported(I->getType(), VT)) 2708 return false; 2709 2710 unsigned Opc; 2711 const TargetRegisterClass *RC; 2712 switch (VT.SimpleTy) { 2713 default: 2714 return false; 2715 case MVT::i1: 2716 case MVT::i8: 2717 case MVT::i16: 2718 case MVT::i32: 2719 Opc = AArch64::CSELWr; 2720 RC = &AArch64::GPR32RegClass; 2721 break; 2722 case MVT::i64: 2723 Opc = AArch64::CSELXr; 2724 RC = &AArch64::GPR64RegClass; 2725 break; 2726 case MVT::f32: 2727 Opc = AArch64::FCSELSrrr; 2728 RC = &AArch64::FPR32RegClass; 2729 break; 2730 case MVT::f64: 2731 Opc = AArch64::FCSELDrrr; 2732 RC = &AArch64::FPR64RegClass; 2733 break; 2734 } 2735 2736 const SelectInst *SI = cast<SelectInst>(I); 2737 const Value *Cond = SI->getCondition(); 2738 AArch64CC::CondCode CC = AArch64CC::NE; 2739 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2740 2741 if (optimizeSelect(SI)) 2742 return true; 2743 2744 // Try to pickup the flags, so we don't have to emit another compare. 2745 if (foldXALUIntrinsic(CC, I, Cond)) { 2746 // Fake request the condition to force emission of the XALU intrinsic. 2747 unsigned CondReg = getRegForValue(Cond); 2748 if (!CondReg) 2749 return false; 2750 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2751 isValueAvailable(Cond)) { 2752 const auto *Cmp = cast<CmpInst>(Cond); 2753 // Try to optimize or fold the cmp. 2754 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2755 const Value *FoldSelect = nullptr; 2756 switch (Predicate) { 2757 default: 2758 break; 2759 case CmpInst::FCMP_FALSE: 2760 FoldSelect = SI->getFalseValue(); 2761 break; 2762 case CmpInst::FCMP_TRUE: 2763 FoldSelect = SI->getTrueValue(); 2764 break; 2765 } 2766 2767 if (FoldSelect) { 2768 unsigned SrcReg = getRegForValue(FoldSelect); 2769 if (!SrcReg) 2770 return false; 2771 unsigned UseReg = lookUpRegForValue(SI); 2772 if (UseReg) 2773 MRI.clearKillFlags(UseReg); 2774 2775 updateValueMap(I, SrcReg); 2776 return true; 2777 } 2778 2779 // Emit the cmp. 2780 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2781 return false; 2782 2783 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2784 CC = getCompareCC(Predicate); 2785 switch (Predicate) { 2786 default: 2787 break; 2788 case CmpInst::FCMP_UEQ: 2789 ExtraCC = AArch64CC::EQ; 2790 CC = AArch64CC::VS; 2791 break; 2792 case CmpInst::FCMP_ONE: 2793 ExtraCC = AArch64CC::MI; 2794 CC = AArch64CC::GT; 2795 break; 2796 } 2797 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2798 } else { 2799 unsigned CondReg = getRegForValue(Cond); 2800 if (!CondReg) 2801 return false; 2802 bool CondIsKill = hasTrivialKill(Cond); 2803 2804 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2805 CondReg = constrainOperandRegClass(II, CondReg, 1); 2806 2807 // Emit a TST instruction (ANDS wzr, reg, #imm). 2808 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2809 AArch64::WZR) 2810 .addReg(CondReg, getKillRegState(CondIsKill)) 2811 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2812 } 2813 2814 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2815 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2816 2817 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2818 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2819 2820 if (!Src1Reg || !Src2Reg) 2821 return false; 2822 2823 if (ExtraCC != AArch64CC::AL) { 2824 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2825 Src2IsKill, ExtraCC); 2826 Src2IsKill = true; 2827 } 2828 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2829 Src2IsKill, CC); 2830 updateValueMap(I, ResultReg); 2831 return true; 2832 } 2833 2834 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2835 Value *V = I->getOperand(0); 2836 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2837 return false; 2838 2839 unsigned Op = getRegForValue(V); 2840 if (Op == 0) 2841 return false; 2842 2843 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2844 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2845 ResultReg).addReg(Op); 2846 updateValueMap(I, ResultReg); 2847 return true; 2848 } 2849 2850 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2851 Value *V = I->getOperand(0); 2852 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2853 return false; 2854 2855 unsigned Op = getRegForValue(V); 2856 if (Op == 0) 2857 return false; 2858 2859 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2860 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2861 ResultReg).addReg(Op); 2862 updateValueMap(I, ResultReg); 2863 return true; 2864 } 2865 2866 // FPToUI and FPToSI 2867 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2868 MVT DestVT; 2869 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2870 return false; 2871 2872 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2873 if (SrcReg == 0) 2874 return false; 2875 2876 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2877 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2878 return false; 2879 2880 unsigned Opc; 2881 if (SrcVT == MVT::f64) { 2882 if (Signed) 2883 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2884 else 2885 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2886 } else { 2887 if (Signed) 2888 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2889 else 2890 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2891 } 2892 unsigned ResultReg = createResultReg( 2893 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2894 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2895 .addReg(SrcReg); 2896 updateValueMap(I, ResultReg); 2897 return true; 2898 } 2899 2900 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2901 MVT DestVT; 2902 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2903 return false; 2904 // Let regular ISEL handle FP16 2905 if (DestVT == MVT::f16) 2906 return false; 2907 2908 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2909 "Unexpected value type."); 2910 2911 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2912 if (!SrcReg) 2913 return false; 2914 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2915 2916 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2917 2918 // Handle sign-extension. 2919 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2920 SrcReg = 2921 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2922 if (!SrcReg) 2923 return false; 2924 SrcIsKill = true; 2925 } 2926 2927 unsigned Opc; 2928 if (SrcVT == MVT::i64) { 2929 if (Signed) 2930 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2931 else 2932 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2933 } else { 2934 if (Signed) 2935 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2936 else 2937 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2938 } 2939 2940 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2941 SrcIsKill); 2942 updateValueMap(I, ResultReg); 2943 return true; 2944 } 2945 2946 bool AArch64FastISel::fastLowerArguments() { 2947 if (!FuncInfo.CanLowerReturn) 2948 return false; 2949 2950 const Function *F = FuncInfo.Fn; 2951 if (F->isVarArg()) 2952 return false; 2953 2954 CallingConv::ID CC = F->getCallingConv(); 2955 if (CC != CallingConv::C && CC != CallingConv::Swift) 2956 return false; 2957 2958 if (Subtarget->hasCustomCallingConv()) 2959 return false; 2960 2961 // Only handle simple cases of up to 8 GPR and FPR each. 2962 unsigned GPRCnt = 0; 2963 unsigned FPRCnt = 0; 2964 for (auto const &Arg : F->args()) { 2965 if (Arg.hasAttribute(Attribute::ByVal) || 2966 Arg.hasAttribute(Attribute::InReg) || 2967 Arg.hasAttribute(Attribute::StructRet) || 2968 Arg.hasAttribute(Attribute::SwiftSelf) || 2969 Arg.hasAttribute(Attribute::SwiftError) || 2970 Arg.hasAttribute(Attribute::Nest)) 2971 return false; 2972 2973 Type *ArgTy = Arg.getType(); 2974 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2975 return false; 2976 2977 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2978 if (!ArgVT.isSimple()) 2979 return false; 2980 2981 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2982 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2983 return false; 2984 2985 if (VT.isVector() && 2986 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2987 return false; 2988 2989 if (VT >= MVT::i1 && VT <= MVT::i64) 2990 ++GPRCnt; 2991 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2992 VT.is128BitVector()) 2993 ++FPRCnt; 2994 else 2995 return false; 2996 2997 if (GPRCnt > 8 || FPRCnt > 8) 2998 return false; 2999 } 3000 3001 static const MCPhysReg Registers[6][8] = { 3002 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 3003 AArch64::W5, AArch64::W6, AArch64::W7 }, 3004 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 3005 AArch64::X5, AArch64::X6, AArch64::X7 }, 3006 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 3007 AArch64::H5, AArch64::H6, AArch64::H7 }, 3008 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 3009 AArch64::S5, AArch64::S6, AArch64::S7 }, 3010 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 3011 AArch64::D5, AArch64::D6, AArch64::D7 }, 3012 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 3013 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 3014 }; 3015 3016 unsigned GPRIdx = 0; 3017 unsigned FPRIdx = 0; 3018 for (auto const &Arg : F->args()) { 3019 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 3020 unsigned SrcReg; 3021 const TargetRegisterClass *RC; 3022 if (VT >= MVT::i1 && VT <= MVT::i32) { 3023 SrcReg = Registers[0][GPRIdx++]; 3024 RC = &AArch64::GPR32RegClass; 3025 VT = MVT::i32; 3026 } else if (VT == MVT::i64) { 3027 SrcReg = Registers[1][GPRIdx++]; 3028 RC = &AArch64::GPR64RegClass; 3029 } else if (VT == MVT::f16) { 3030 SrcReg = Registers[2][FPRIdx++]; 3031 RC = &AArch64::FPR16RegClass; 3032 } else if (VT == MVT::f32) { 3033 SrcReg = Registers[3][FPRIdx++]; 3034 RC = &AArch64::FPR32RegClass; 3035 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 3036 SrcReg = Registers[4][FPRIdx++]; 3037 RC = &AArch64::FPR64RegClass; 3038 } else if (VT.is128BitVector()) { 3039 SrcReg = Registers[5][FPRIdx++]; 3040 RC = &AArch64::FPR128RegClass; 3041 } else 3042 llvm_unreachable("Unexpected value type."); 3043 3044 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3045 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3046 // Without this, EmitLiveInCopies may eliminate the livein if its only 3047 // use is a bitcast (which isn't turned into an instruction). 3048 unsigned ResultReg = createResultReg(RC); 3049 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3050 TII.get(TargetOpcode::COPY), ResultReg) 3051 .addReg(DstReg, getKillRegState(true)); 3052 updateValueMap(&Arg, ResultReg); 3053 } 3054 return true; 3055 } 3056 3057 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3058 SmallVectorImpl<MVT> &OutVTs, 3059 unsigned &NumBytes) { 3060 CallingConv::ID CC = CLI.CallConv; 3061 SmallVector<CCValAssign, 16> ArgLocs; 3062 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3063 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3064 3065 // Get a count of how many bytes are to be pushed on the stack. 3066 NumBytes = CCInfo.getNextStackOffset(); 3067 3068 // Issue CALLSEQ_START 3069 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3071 .addImm(NumBytes).addImm(0); 3072 3073 // Process the args. 3074 for (CCValAssign &VA : ArgLocs) { 3075 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3076 MVT ArgVT = OutVTs[VA.getValNo()]; 3077 3078 unsigned ArgReg = getRegForValue(ArgVal); 3079 if (!ArgReg) 3080 return false; 3081 3082 // Handle arg promotion: SExt, ZExt, AExt. 3083 switch (VA.getLocInfo()) { 3084 case CCValAssign::Full: 3085 break; 3086 case CCValAssign::SExt: { 3087 MVT DestVT = VA.getLocVT(); 3088 MVT SrcVT = ArgVT; 3089 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3090 if (!ArgReg) 3091 return false; 3092 break; 3093 } 3094 case CCValAssign::AExt: 3095 // Intentional fall-through. 3096 case CCValAssign::ZExt: { 3097 MVT DestVT = VA.getLocVT(); 3098 MVT SrcVT = ArgVT; 3099 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3100 if (!ArgReg) 3101 return false; 3102 break; 3103 } 3104 default: 3105 llvm_unreachable("Unknown arg promotion!"); 3106 } 3107 3108 // Now copy/store arg to correct locations. 3109 if (VA.isRegLoc() && !VA.needsCustom()) { 3110 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3111 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3112 CLI.OutRegs.push_back(VA.getLocReg()); 3113 } else if (VA.needsCustom()) { 3114 // FIXME: Handle custom args. 3115 return false; 3116 } else { 3117 assert(VA.isMemLoc() && "Assuming store on stack."); 3118 3119 // Don't emit stores for undef values. 3120 if (isa<UndefValue>(ArgVal)) 3121 continue; 3122 3123 // Need to store on the stack. 3124 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3125 3126 unsigned BEAlign = 0; 3127 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3128 BEAlign = 8 - ArgSize; 3129 3130 Address Addr; 3131 Addr.setKind(Address::RegBase); 3132 Addr.setReg(AArch64::SP); 3133 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3134 3135 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3136 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3137 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3138 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3139 3140 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3141 return false; 3142 } 3143 } 3144 return true; 3145 } 3146 3147 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3148 unsigned NumBytes) { 3149 CallingConv::ID CC = CLI.CallConv; 3150 3151 // Issue CALLSEQ_END 3152 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3153 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3154 .addImm(NumBytes).addImm(0); 3155 3156 // Now the return value. 3157 if (RetVT != MVT::isVoid) { 3158 SmallVector<CCValAssign, 16> RVLocs; 3159 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3160 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3161 3162 // Only handle a single return value. 3163 if (RVLocs.size() != 1) 3164 return false; 3165 3166 // Copy all of the result registers out of their specified physreg. 3167 MVT CopyVT = RVLocs[0].getValVT(); 3168 3169 // TODO: Handle big-endian results 3170 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3171 return false; 3172 3173 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3174 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3175 TII.get(TargetOpcode::COPY), ResultReg) 3176 .addReg(RVLocs[0].getLocReg()); 3177 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3178 3179 CLI.ResultReg = ResultReg; 3180 CLI.NumResultRegs = 1; 3181 } 3182 3183 return true; 3184 } 3185 3186 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3187 CallingConv::ID CC = CLI.CallConv; 3188 bool IsTailCall = CLI.IsTailCall; 3189 bool IsVarArg = CLI.IsVarArg; 3190 const Value *Callee = CLI.Callee; 3191 MCSymbol *Symbol = CLI.Symbol; 3192 3193 if (!Callee && !Symbol) 3194 return false; 3195 3196 // Allow SelectionDAG isel to handle tail calls. 3197 if (IsTailCall) 3198 return false; 3199 3200 // FIXME: we could and should support this, but for now correctness at -O0 is 3201 // more important. 3202 if (Subtarget->isTargetILP32()) 3203 return false; 3204 3205 CodeModel::Model CM = TM.getCodeModel(); 3206 // Only support the small-addressing and large code models. 3207 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3208 return false; 3209 3210 // FIXME: Add large code model support for ELF. 3211 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3212 return false; 3213 3214 // Let SDISel handle vararg functions. 3215 if (IsVarArg) 3216 return false; 3217 3218 // FIXME: Only handle *simple* calls for now. 3219 MVT RetVT; 3220 if (CLI.RetTy->isVoidTy()) 3221 RetVT = MVT::isVoid; 3222 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3223 return false; 3224 3225 for (auto Flag : CLI.OutFlags) 3226 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3227 Flag.isSwiftSelf() || Flag.isSwiftError()) 3228 return false; 3229 3230 // Set up the argument vectors. 3231 SmallVector<MVT, 16> OutVTs; 3232 OutVTs.reserve(CLI.OutVals.size()); 3233 3234 for (auto *Val : CLI.OutVals) { 3235 MVT VT; 3236 if (!isTypeLegal(Val->getType(), VT) && 3237 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3238 return false; 3239 3240 // We don't handle vector parameters yet. 3241 if (VT.isVector() || VT.getSizeInBits() > 64) 3242 return false; 3243 3244 OutVTs.push_back(VT); 3245 } 3246 3247 Address Addr; 3248 if (Callee && !computeCallAddress(Callee, Addr)) 3249 return false; 3250 3251 // The weak function target may be zero; in that case we must use indirect 3252 // addressing via a stub on windows as it may be out of range for a 3253 // PC-relative jump. 3254 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3255 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3256 return false; 3257 3258 // Handle the arguments now that we've gotten them. 3259 unsigned NumBytes; 3260 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3261 return false; 3262 3263 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3264 if (RegInfo->isAnyArgRegReserved(*MF)) 3265 RegInfo->emitReservedArgRegCallError(*MF); 3266 3267 // Issue the call. 3268 MachineInstrBuilder MIB; 3269 if (Subtarget->useSmallAddressing()) { 3270 const MCInstrDesc &II = 3271 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3272 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3273 if (Symbol) 3274 MIB.addSym(Symbol, 0); 3275 else if (Addr.getGlobalValue()) 3276 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3277 else if (Addr.getReg()) { 3278 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3279 MIB.addReg(Reg); 3280 } else 3281 return false; 3282 } else { 3283 unsigned CallReg = 0; 3284 if (Symbol) { 3285 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3286 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3287 ADRPReg) 3288 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3289 3290 CallReg = createResultReg(&AArch64::GPR64RegClass); 3291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3292 TII.get(AArch64::LDRXui), CallReg) 3293 .addReg(ADRPReg) 3294 .addSym(Symbol, 3295 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3296 } else if (Addr.getGlobalValue()) 3297 CallReg = materializeGV(Addr.getGlobalValue()); 3298 else if (Addr.getReg()) 3299 CallReg = Addr.getReg(); 3300 3301 if (!CallReg) 3302 return false; 3303 3304 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3305 CallReg = constrainOperandRegClass(II, CallReg, 0); 3306 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3307 } 3308 3309 // Add implicit physical register uses to the call. 3310 for (auto Reg : CLI.OutRegs) 3311 MIB.addReg(Reg, RegState::Implicit); 3312 3313 // Add a register mask with the call-preserved registers. 3314 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3315 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3316 3317 CLI.Call = MIB; 3318 3319 // Finish off the call including any return values. 3320 return finishCall(CLI, RetVT, NumBytes); 3321 } 3322 3323 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3324 if (Alignment) 3325 return Len / Alignment <= 4; 3326 else 3327 return Len < 32; 3328 } 3329 3330 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3331 uint64_t Len, unsigned Alignment) { 3332 // Make sure we don't bloat code by inlining very large memcpy's. 3333 if (!isMemCpySmall(Len, Alignment)) 3334 return false; 3335 3336 int64_t UnscaledOffset = 0; 3337 Address OrigDest = Dest; 3338 Address OrigSrc = Src; 3339 3340 while (Len) { 3341 MVT VT; 3342 if (!Alignment || Alignment >= 8) { 3343 if (Len >= 8) 3344 VT = MVT::i64; 3345 else if (Len >= 4) 3346 VT = MVT::i32; 3347 else if (Len >= 2) 3348 VT = MVT::i16; 3349 else { 3350 VT = MVT::i8; 3351 } 3352 } else { 3353 // Bound based on alignment. 3354 if (Len >= 4 && Alignment == 4) 3355 VT = MVT::i32; 3356 else if (Len >= 2 && Alignment == 2) 3357 VT = MVT::i16; 3358 else { 3359 VT = MVT::i8; 3360 } 3361 } 3362 3363 unsigned ResultReg = emitLoad(VT, VT, Src); 3364 if (!ResultReg) 3365 return false; 3366 3367 if (!emitStore(VT, ResultReg, Dest)) 3368 return false; 3369 3370 int64_t Size = VT.getSizeInBits() / 8; 3371 Len -= Size; 3372 UnscaledOffset += Size; 3373 3374 // We need to recompute the unscaled offset for each iteration. 3375 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3376 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3377 } 3378 3379 return true; 3380 } 3381 3382 /// Check if it is possible to fold the condition from the XALU intrinsic 3383 /// into the user. The condition code will only be updated on success. 3384 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3385 const Instruction *I, 3386 const Value *Cond) { 3387 if (!isa<ExtractValueInst>(Cond)) 3388 return false; 3389 3390 const auto *EV = cast<ExtractValueInst>(Cond); 3391 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3392 return false; 3393 3394 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3395 MVT RetVT; 3396 const Function *Callee = II->getCalledFunction(); 3397 Type *RetTy = 3398 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3399 if (!isTypeLegal(RetTy, RetVT)) 3400 return false; 3401 3402 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3403 return false; 3404 3405 const Value *LHS = II->getArgOperand(0); 3406 const Value *RHS = II->getArgOperand(1); 3407 3408 // Canonicalize immediate to the RHS. 3409 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3410 std::swap(LHS, RHS); 3411 3412 // Simplify multiplies. 3413 Intrinsic::ID IID = II->getIntrinsicID(); 3414 switch (IID) { 3415 default: 3416 break; 3417 case Intrinsic::smul_with_overflow: 3418 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3419 if (C->getValue() == 2) 3420 IID = Intrinsic::sadd_with_overflow; 3421 break; 3422 case Intrinsic::umul_with_overflow: 3423 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3424 if (C->getValue() == 2) 3425 IID = Intrinsic::uadd_with_overflow; 3426 break; 3427 } 3428 3429 AArch64CC::CondCode TmpCC; 3430 switch (IID) { 3431 default: 3432 return false; 3433 case Intrinsic::sadd_with_overflow: 3434 case Intrinsic::ssub_with_overflow: 3435 TmpCC = AArch64CC::VS; 3436 break; 3437 case Intrinsic::uadd_with_overflow: 3438 TmpCC = AArch64CC::HS; 3439 break; 3440 case Intrinsic::usub_with_overflow: 3441 TmpCC = AArch64CC::LO; 3442 break; 3443 case Intrinsic::smul_with_overflow: 3444 case Intrinsic::umul_with_overflow: 3445 TmpCC = AArch64CC::NE; 3446 break; 3447 } 3448 3449 // Check if both instructions are in the same basic block. 3450 if (!isValueAvailable(II)) 3451 return false; 3452 3453 // Make sure nothing is in the way 3454 BasicBlock::const_iterator Start(I); 3455 BasicBlock::const_iterator End(II); 3456 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3457 // We only expect extractvalue instructions between the intrinsic and the 3458 // instruction to be selected. 3459 if (!isa<ExtractValueInst>(Itr)) 3460 return false; 3461 3462 // Check that the extractvalue operand comes from the intrinsic. 3463 const auto *EVI = cast<ExtractValueInst>(Itr); 3464 if (EVI->getAggregateOperand() != II) 3465 return false; 3466 } 3467 3468 CC = TmpCC; 3469 return true; 3470 } 3471 3472 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3473 // FIXME: Handle more intrinsics. 3474 switch (II->getIntrinsicID()) { 3475 default: return false; 3476 case Intrinsic::frameaddress: { 3477 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3478 MFI.setFrameAddressIsTaken(true); 3479 3480 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3481 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3482 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3484 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3485 // Recursively load frame address 3486 // ldr x0, [fp] 3487 // ldr x0, [x0] 3488 // ldr x0, [x0] 3489 // ... 3490 unsigned DestReg; 3491 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3492 while (Depth--) { 3493 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3494 SrcReg, /*IsKill=*/true, 0); 3495 assert(DestReg && "Unexpected LDR instruction emission failure."); 3496 SrcReg = DestReg; 3497 } 3498 3499 updateValueMap(II, SrcReg); 3500 return true; 3501 } 3502 case Intrinsic::sponentry: { 3503 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3504 3505 // SP = FP + Fixed Object + 16 3506 int FI = MFI.CreateFixedObject(4, 0, false); 3507 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3508 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3509 TII.get(AArch64::ADDXri), ResultReg) 3510 .addFrameIndex(FI) 3511 .addImm(0) 3512 .addImm(0); 3513 3514 updateValueMap(II, ResultReg); 3515 return true; 3516 } 3517 case Intrinsic::memcpy: 3518 case Intrinsic::memmove: { 3519 const auto *MTI = cast<MemTransferInst>(II); 3520 // Don't handle volatile. 3521 if (MTI->isVolatile()) 3522 return false; 3523 3524 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3525 // we would emit dead code because we don't currently handle memmoves. 3526 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3527 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3528 // Small memcpy's are common enough that we want to do them without a call 3529 // if possible. 3530 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3531 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3532 MTI->getSourceAlignment()); 3533 if (isMemCpySmall(Len, Alignment)) { 3534 Address Dest, Src; 3535 if (!computeAddress(MTI->getRawDest(), Dest) || 3536 !computeAddress(MTI->getRawSource(), Src)) 3537 return false; 3538 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3539 return true; 3540 } 3541 } 3542 3543 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3544 return false; 3545 3546 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3547 // Fast instruction selection doesn't support the special 3548 // address spaces. 3549 return false; 3550 3551 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3552 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); 3553 } 3554 case Intrinsic::memset: { 3555 const MemSetInst *MSI = cast<MemSetInst>(II); 3556 // Don't handle volatile. 3557 if (MSI->isVolatile()) 3558 return false; 3559 3560 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3561 return false; 3562 3563 if (MSI->getDestAddressSpace() > 255) 3564 // Fast instruction selection doesn't support the special 3565 // address spaces. 3566 return false; 3567 3568 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); 3569 } 3570 case Intrinsic::sin: 3571 case Intrinsic::cos: 3572 case Intrinsic::pow: { 3573 MVT RetVT; 3574 if (!isTypeLegal(II->getType(), RetVT)) 3575 return false; 3576 3577 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3578 return false; 3579 3580 static const RTLIB::Libcall LibCallTable[3][2] = { 3581 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3582 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3583 { RTLIB::POW_F32, RTLIB::POW_F64 } 3584 }; 3585 RTLIB::Libcall LC; 3586 bool Is64Bit = RetVT == MVT::f64; 3587 switch (II->getIntrinsicID()) { 3588 default: 3589 llvm_unreachable("Unexpected intrinsic."); 3590 case Intrinsic::sin: 3591 LC = LibCallTable[0][Is64Bit]; 3592 break; 3593 case Intrinsic::cos: 3594 LC = LibCallTable[1][Is64Bit]; 3595 break; 3596 case Intrinsic::pow: 3597 LC = LibCallTable[2][Is64Bit]; 3598 break; 3599 } 3600 3601 ArgListTy Args; 3602 Args.reserve(II->getNumArgOperands()); 3603 3604 // Populate the argument list. 3605 for (auto &Arg : II->arg_operands()) { 3606 ArgListEntry Entry; 3607 Entry.Val = Arg; 3608 Entry.Ty = Arg->getType(); 3609 Args.push_back(Entry); 3610 } 3611 3612 CallLoweringInfo CLI; 3613 MCContext &Ctx = MF->getContext(); 3614 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3615 TLI.getLibcallName(LC), std::move(Args)); 3616 if (!lowerCallTo(CLI)) 3617 return false; 3618 updateValueMap(II, CLI.ResultReg); 3619 return true; 3620 } 3621 case Intrinsic::fabs: { 3622 MVT VT; 3623 if (!isTypeLegal(II->getType(), VT)) 3624 return false; 3625 3626 unsigned Opc; 3627 switch (VT.SimpleTy) { 3628 default: 3629 return false; 3630 case MVT::f32: 3631 Opc = AArch64::FABSSr; 3632 break; 3633 case MVT::f64: 3634 Opc = AArch64::FABSDr; 3635 break; 3636 } 3637 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3638 if (!SrcReg) 3639 return false; 3640 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3641 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3642 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3643 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3644 updateValueMap(II, ResultReg); 3645 return true; 3646 } 3647 case Intrinsic::trap: 3648 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3649 .addImm(1); 3650 return true; 3651 case Intrinsic::debugtrap: 3652 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3653 .addImm(0xF000); 3654 return true; 3655 3656 case Intrinsic::sqrt: { 3657 Type *RetTy = II->getCalledFunction()->getReturnType(); 3658 3659 MVT VT; 3660 if (!isTypeLegal(RetTy, VT)) 3661 return false; 3662 3663 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3664 if (!Op0Reg) 3665 return false; 3666 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3667 3668 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3669 if (!ResultReg) 3670 return false; 3671 3672 updateValueMap(II, ResultReg); 3673 return true; 3674 } 3675 case Intrinsic::sadd_with_overflow: 3676 case Intrinsic::uadd_with_overflow: 3677 case Intrinsic::ssub_with_overflow: 3678 case Intrinsic::usub_with_overflow: 3679 case Intrinsic::smul_with_overflow: 3680 case Intrinsic::umul_with_overflow: { 3681 // This implements the basic lowering of the xalu with overflow intrinsics. 3682 const Function *Callee = II->getCalledFunction(); 3683 auto *Ty = cast<StructType>(Callee->getReturnType()); 3684 Type *RetTy = Ty->getTypeAtIndex(0U); 3685 3686 MVT VT; 3687 if (!isTypeLegal(RetTy, VT)) 3688 return false; 3689 3690 if (VT != MVT::i32 && VT != MVT::i64) 3691 return false; 3692 3693 const Value *LHS = II->getArgOperand(0); 3694 const Value *RHS = II->getArgOperand(1); 3695 // Canonicalize immediate to the RHS. 3696 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3697 std::swap(LHS, RHS); 3698 3699 // Simplify multiplies. 3700 Intrinsic::ID IID = II->getIntrinsicID(); 3701 switch (IID) { 3702 default: 3703 break; 3704 case Intrinsic::smul_with_overflow: 3705 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3706 if (C->getValue() == 2) { 3707 IID = Intrinsic::sadd_with_overflow; 3708 RHS = LHS; 3709 } 3710 break; 3711 case Intrinsic::umul_with_overflow: 3712 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3713 if (C->getValue() == 2) { 3714 IID = Intrinsic::uadd_with_overflow; 3715 RHS = LHS; 3716 } 3717 break; 3718 } 3719 3720 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3721 AArch64CC::CondCode CC = AArch64CC::Invalid; 3722 switch (IID) { 3723 default: llvm_unreachable("Unexpected intrinsic!"); 3724 case Intrinsic::sadd_with_overflow: 3725 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3726 CC = AArch64CC::VS; 3727 break; 3728 case Intrinsic::uadd_with_overflow: 3729 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3730 CC = AArch64CC::HS; 3731 break; 3732 case Intrinsic::ssub_with_overflow: 3733 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3734 CC = AArch64CC::VS; 3735 break; 3736 case Intrinsic::usub_with_overflow: 3737 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3738 CC = AArch64CC::LO; 3739 break; 3740 case Intrinsic::smul_with_overflow: { 3741 CC = AArch64CC::NE; 3742 unsigned LHSReg = getRegForValue(LHS); 3743 if (!LHSReg) 3744 return false; 3745 bool LHSIsKill = hasTrivialKill(LHS); 3746 3747 unsigned RHSReg = getRegForValue(RHS); 3748 if (!RHSReg) 3749 return false; 3750 bool RHSIsKill = hasTrivialKill(RHS); 3751 3752 if (VT == MVT::i32) { 3753 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3754 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3755 /*IsKill=*/false, 32); 3756 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3757 AArch64::sub_32); 3758 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3759 AArch64::sub_32); 3760 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3761 AArch64_AM::ASR, 31, /*WantResult=*/false); 3762 } else { 3763 assert(VT == MVT::i64 && "Unexpected value type."); 3764 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3765 // reused in the next instruction. 3766 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3767 /*IsKill=*/false); 3768 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3769 RHSReg, RHSIsKill); 3770 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3771 AArch64_AM::ASR, 63, /*WantResult=*/false); 3772 } 3773 break; 3774 } 3775 case Intrinsic::umul_with_overflow: { 3776 CC = AArch64CC::NE; 3777 unsigned LHSReg = getRegForValue(LHS); 3778 if (!LHSReg) 3779 return false; 3780 bool LHSIsKill = hasTrivialKill(LHS); 3781 3782 unsigned RHSReg = getRegForValue(RHS); 3783 if (!RHSReg) 3784 return false; 3785 bool RHSIsKill = hasTrivialKill(RHS); 3786 3787 if (VT == MVT::i32) { 3788 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3789 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3790 /*IsKill=*/false, AArch64_AM::LSR, 32, 3791 /*WantResult=*/false); 3792 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3793 AArch64::sub_32); 3794 } else { 3795 assert(VT == MVT::i64 && "Unexpected value type."); 3796 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3797 // reused in the next instruction. 3798 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3799 /*IsKill=*/false); 3800 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3801 RHSReg, RHSIsKill); 3802 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3803 /*IsKill=*/false, /*WantResult=*/false); 3804 } 3805 break; 3806 } 3807 } 3808 3809 if (MulReg) { 3810 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3811 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3812 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3813 } 3814 3815 if (!ResultReg1) 3816 return false; 3817 3818 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3819 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3820 /*IsKill=*/true, getInvertedCondCode(CC)); 3821 (void)ResultReg2; 3822 assert((ResultReg1 + 1) == ResultReg2 && 3823 "Nonconsecutive result registers."); 3824 updateValueMap(II, ResultReg1, 2); 3825 return true; 3826 } 3827 } 3828 return false; 3829 } 3830 3831 bool AArch64FastISel::selectRet(const Instruction *I) { 3832 const ReturnInst *Ret = cast<ReturnInst>(I); 3833 const Function &F = *I->getParent()->getParent(); 3834 3835 if (!FuncInfo.CanLowerReturn) 3836 return false; 3837 3838 if (F.isVarArg()) 3839 return false; 3840 3841 if (TLI.supportSwiftError() && 3842 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3843 return false; 3844 3845 if (TLI.supportSplitCSR(FuncInfo.MF)) 3846 return false; 3847 3848 // Build a list of return value registers. 3849 SmallVector<unsigned, 4> RetRegs; 3850 3851 if (Ret->getNumOperands() > 0) { 3852 CallingConv::ID CC = F.getCallingConv(); 3853 SmallVector<ISD::OutputArg, 4> Outs; 3854 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3855 3856 // Analyze operands of the call, assigning locations to each operand. 3857 SmallVector<CCValAssign, 16> ValLocs; 3858 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3859 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3860 : RetCC_AArch64_AAPCS; 3861 CCInfo.AnalyzeReturn(Outs, RetCC); 3862 3863 // Only handle a single return value for now. 3864 if (ValLocs.size() != 1) 3865 return false; 3866 3867 CCValAssign &VA = ValLocs[0]; 3868 const Value *RV = Ret->getOperand(0); 3869 3870 // Don't bother handling odd stuff for now. 3871 if ((VA.getLocInfo() != CCValAssign::Full) && 3872 (VA.getLocInfo() != CCValAssign::BCvt)) 3873 return false; 3874 3875 // Only handle register returns for now. 3876 if (!VA.isRegLoc()) 3877 return false; 3878 3879 unsigned Reg = getRegForValue(RV); 3880 if (Reg == 0) 3881 return false; 3882 3883 unsigned SrcReg = Reg + VA.getValNo(); 3884 Register DestReg = VA.getLocReg(); 3885 // Avoid a cross-class copy. This is very unlikely. 3886 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3887 return false; 3888 3889 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3890 if (!RVEVT.isSimple()) 3891 return false; 3892 3893 // Vectors (of > 1 lane) in big endian need tricky handling. 3894 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3895 !Subtarget->isLittleEndian()) 3896 return false; 3897 3898 MVT RVVT = RVEVT.getSimpleVT(); 3899 if (RVVT == MVT::f128) 3900 return false; 3901 3902 MVT DestVT = VA.getValVT(); 3903 // Special handling for extended integers. 3904 if (RVVT != DestVT) { 3905 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3906 return false; 3907 3908 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3909 return false; 3910 3911 bool IsZExt = Outs[0].Flags.isZExt(); 3912 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3913 if (SrcReg == 0) 3914 return false; 3915 } 3916 3917 // "Callee" (i.e. value producer) zero extends pointers at function 3918 // boundary. 3919 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3920 SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff); 3921 3922 // Make the copy. 3923 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3924 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3925 3926 // Add register to return instruction. 3927 RetRegs.push_back(VA.getLocReg()); 3928 } 3929 3930 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3931 TII.get(AArch64::RET_ReallyLR)); 3932 for (unsigned RetReg : RetRegs) 3933 MIB.addReg(RetReg, RegState::Implicit); 3934 return true; 3935 } 3936 3937 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3938 Type *DestTy = I->getType(); 3939 Value *Op = I->getOperand(0); 3940 Type *SrcTy = Op->getType(); 3941 3942 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3943 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3944 if (!SrcEVT.isSimple()) 3945 return false; 3946 if (!DestEVT.isSimple()) 3947 return false; 3948 3949 MVT SrcVT = SrcEVT.getSimpleVT(); 3950 MVT DestVT = DestEVT.getSimpleVT(); 3951 3952 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3953 SrcVT != MVT::i8) 3954 return false; 3955 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3956 DestVT != MVT::i1) 3957 return false; 3958 3959 unsigned SrcReg = getRegForValue(Op); 3960 if (!SrcReg) 3961 return false; 3962 bool SrcIsKill = hasTrivialKill(Op); 3963 3964 // If we're truncating from i64 to a smaller non-legal type then generate an 3965 // AND. Otherwise, we know the high bits are undefined and a truncate only 3966 // generate a COPY. We cannot mark the source register also as result 3967 // register, because this can incorrectly transfer the kill flag onto the 3968 // source register. 3969 unsigned ResultReg; 3970 if (SrcVT == MVT::i64) { 3971 uint64_t Mask = 0; 3972 switch (DestVT.SimpleTy) { 3973 default: 3974 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3975 return false; 3976 case MVT::i1: 3977 Mask = 0x1; 3978 break; 3979 case MVT::i8: 3980 Mask = 0xff; 3981 break; 3982 case MVT::i16: 3983 Mask = 0xffff; 3984 break; 3985 } 3986 // Issue an extract_subreg to get the lower 32-bits. 3987 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3988 AArch64::sub_32); 3989 // Create the AND instruction which performs the actual truncation. 3990 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3991 assert(ResultReg && "Unexpected AND instruction emission failure."); 3992 } else { 3993 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3994 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3995 TII.get(TargetOpcode::COPY), ResultReg) 3996 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3997 } 3998 3999 updateValueMap(I, ResultReg); 4000 return true; 4001 } 4002 4003 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4004 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4005 DestVT == MVT::i64) && 4006 "Unexpected value type."); 4007 // Handle i8 and i16 as i32. 4008 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4009 DestVT = MVT::i32; 4010 4011 if (IsZExt) { 4012 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 4013 assert(ResultReg && "Unexpected AND instruction emission failure."); 4014 if (DestVT == MVT::i64) { 4015 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4016 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4017 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4019 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4020 .addImm(0) 4021 .addReg(ResultReg) 4022 .addImm(AArch64::sub_32); 4023 ResultReg = Reg64; 4024 } 4025 return ResultReg; 4026 } else { 4027 if (DestVT == MVT::i64) { 4028 // FIXME: We're SExt i1 to i64. 4029 return 0; 4030 } 4031 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4032 /*TODO:IsKill=*/false, 0, 0); 4033 } 4034 } 4035 4036 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4037 unsigned Op1, bool Op1IsKill) { 4038 unsigned Opc, ZReg; 4039 switch (RetVT.SimpleTy) { 4040 default: return 0; 4041 case MVT::i8: 4042 case MVT::i16: 4043 case MVT::i32: 4044 RetVT = MVT::i32; 4045 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4046 case MVT::i64: 4047 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4048 } 4049 4050 const TargetRegisterClass *RC = 4051 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4052 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 4053 /*IsKill=*/ZReg, true); 4054 } 4055 4056 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4057 unsigned Op1, bool Op1IsKill) { 4058 if (RetVT != MVT::i64) 4059 return 0; 4060 4061 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4062 Op0, Op0IsKill, Op1, Op1IsKill, 4063 AArch64::XZR, /*IsKill=*/true); 4064 } 4065 4066 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4067 unsigned Op1, bool Op1IsKill) { 4068 if (RetVT != MVT::i64) 4069 return 0; 4070 4071 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4072 Op0, Op0IsKill, Op1, Op1IsKill, 4073 AArch64::XZR, /*IsKill=*/true); 4074 } 4075 4076 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4077 unsigned Op1Reg, bool Op1IsKill) { 4078 unsigned Opc = 0; 4079 bool NeedTrunc = false; 4080 uint64_t Mask = 0; 4081 switch (RetVT.SimpleTy) { 4082 default: return 0; 4083 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4084 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4085 case MVT::i32: Opc = AArch64::LSLVWr; break; 4086 case MVT::i64: Opc = AArch64::LSLVXr; break; 4087 } 4088 4089 const TargetRegisterClass *RC = 4090 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4091 if (NeedTrunc) { 4092 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4093 Op1IsKill = true; 4094 } 4095 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4096 Op1IsKill); 4097 if (NeedTrunc) 4098 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4099 return ResultReg; 4100 } 4101 4102 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4103 bool Op0IsKill, uint64_t Shift, 4104 bool IsZExt) { 4105 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4106 "Unexpected source/return type pair."); 4107 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4108 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4109 "Unexpected source value type."); 4110 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4111 RetVT == MVT::i64) && "Unexpected return value type."); 4112 4113 bool Is64Bit = (RetVT == MVT::i64); 4114 unsigned RegSize = Is64Bit ? 64 : 32; 4115 unsigned DstBits = RetVT.getSizeInBits(); 4116 unsigned SrcBits = SrcVT.getSizeInBits(); 4117 const TargetRegisterClass *RC = 4118 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4119 4120 // Just emit a copy for "zero" shifts. 4121 if (Shift == 0) { 4122 if (RetVT == SrcVT) { 4123 unsigned ResultReg = createResultReg(RC); 4124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4125 TII.get(TargetOpcode::COPY), ResultReg) 4126 .addReg(Op0, getKillRegState(Op0IsKill)); 4127 return ResultReg; 4128 } else 4129 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4130 } 4131 4132 // Don't deal with undefined shifts. 4133 if (Shift >= DstBits) 4134 return 0; 4135 4136 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4137 // {S|U}BFM Wd, Wn, #r, #s 4138 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4139 4140 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4141 // %2 = shl i16 %1, 4 4142 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4143 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4144 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4145 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4146 4147 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4148 // %2 = shl i16 %1, 8 4149 // Wd<32+7-24,32-24> = Wn<7:0> 4150 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4151 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4152 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4153 4154 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4155 // %2 = shl i16 %1, 12 4156 // Wd<32+3-20,32-20> = Wn<3:0> 4157 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4158 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4159 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4160 4161 unsigned ImmR = RegSize - Shift; 4162 // Limit the width to the length of the source type. 4163 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4164 static const unsigned OpcTable[2][2] = { 4165 {AArch64::SBFMWri, AArch64::SBFMXri}, 4166 {AArch64::UBFMWri, AArch64::UBFMXri} 4167 }; 4168 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4169 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4170 Register TmpReg = MRI.createVirtualRegister(RC); 4171 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4172 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4173 .addImm(0) 4174 .addReg(Op0, getKillRegState(Op0IsKill)) 4175 .addImm(AArch64::sub_32); 4176 Op0 = TmpReg; 4177 Op0IsKill = true; 4178 } 4179 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4180 } 4181 4182 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4183 unsigned Op1Reg, bool Op1IsKill) { 4184 unsigned Opc = 0; 4185 bool NeedTrunc = false; 4186 uint64_t Mask = 0; 4187 switch (RetVT.SimpleTy) { 4188 default: return 0; 4189 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4190 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4191 case MVT::i32: Opc = AArch64::LSRVWr; break; 4192 case MVT::i64: Opc = AArch64::LSRVXr; break; 4193 } 4194 4195 const TargetRegisterClass *RC = 4196 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4197 if (NeedTrunc) { 4198 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4199 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4200 Op0IsKill = Op1IsKill = true; 4201 } 4202 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4203 Op1IsKill); 4204 if (NeedTrunc) 4205 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4206 return ResultReg; 4207 } 4208 4209 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4210 bool Op0IsKill, uint64_t Shift, 4211 bool IsZExt) { 4212 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4213 "Unexpected source/return type pair."); 4214 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4215 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4216 "Unexpected source value type."); 4217 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4218 RetVT == MVT::i64) && "Unexpected return value type."); 4219 4220 bool Is64Bit = (RetVT == MVT::i64); 4221 unsigned RegSize = Is64Bit ? 64 : 32; 4222 unsigned DstBits = RetVT.getSizeInBits(); 4223 unsigned SrcBits = SrcVT.getSizeInBits(); 4224 const TargetRegisterClass *RC = 4225 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4226 4227 // Just emit a copy for "zero" shifts. 4228 if (Shift == 0) { 4229 if (RetVT == SrcVT) { 4230 unsigned ResultReg = createResultReg(RC); 4231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4232 TII.get(TargetOpcode::COPY), ResultReg) 4233 .addReg(Op0, getKillRegState(Op0IsKill)); 4234 return ResultReg; 4235 } else 4236 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4237 } 4238 4239 // Don't deal with undefined shifts. 4240 if (Shift >= DstBits) 4241 return 0; 4242 4243 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4244 // {S|U}BFM Wd, Wn, #r, #s 4245 // Wd<s-r:0> = Wn<s:r> when r <= s 4246 4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4248 // %2 = lshr i16 %1, 4 4249 // Wd<7-4:0> = Wn<7:4> 4250 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4251 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4252 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4253 4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4255 // %2 = lshr i16 %1, 8 4256 // Wd<7-7,0> = Wn<7:7> 4257 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4260 4261 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4262 // %2 = lshr i16 %1, 12 4263 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4264 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4265 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4267 4268 if (Shift >= SrcBits && IsZExt) 4269 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4270 4271 // It is not possible to fold a sign-extend into the LShr instruction. In this 4272 // case emit a sign-extend. 4273 if (!IsZExt) { 4274 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4275 if (!Op0) 4276 return 0; 4277 Op0IsKill = true; 4278 SrcVT = RetVT; 4279 SrcBits = SrcVT.getSizeInBits(); 4280 IsZExt = true; 4281 } 4282 4283 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4284 unsigned ImmS = SrcBits - 1; 4285 static const unsigned OpcTable[2][2] = { 4286 {AArch64::SBFMWri, AArch64::SBFMXri}, 4287 {AArch64::UBFMWri, AArch64::UBFMXri} 4288 }; 4289 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4290 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4291 Register TmpReg = MRI.createVirtualRegister(RC); 4292 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4293 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4294 .addImm(0) 4295 .addReg(Op0, getKillRegState(Op0IsKill)) 4296 .addImm(AArch64::sub_32); 4297 Op0 = TmpReg; 4298 Op0IsKill = true; 4299 } 4300 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4301 } 4302 4303 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4304 unsigned Op1Reg, bool Op1IsKill) { 4305 unsigned Opc = 0; 4306 bool NeedTrunc = false; 4307 uint64_t Mask = 0; 4308 switch (RetVT.SimpleTy) { 4309 default: return 0; 4310 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4311 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4312 case MVT::i32: Opc = AArch64::ASRVWr; break; 4313 case MVT::i64: Opc = AArch64::ASRVXr; break; 4314 } 4315 4316 const TargetRegisterClass *RC = 4317 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4318 if (NeedTrunc) { 4319 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4320 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4321 Op0IsKill = Op1IsKill = true; 4322 } 4323 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4324 Op1IsKill); 4325 if (NeedTrunc) 4326 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4327 return ResultReg; 4328 } 4329 4330 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4331 bool Op0IsKill, uint64_t Shift, 4332 bool IsZExt) { 4333 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4334 "Unexpected source/return type pair."); 4335 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4336 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4337 "Unexpected source value type."); 4338 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4339 RetVT == MVT::i64) && "Unexpected return value type."); 4340 4341 bool Is64Bit = (RetVT == MVT::i64); 4342 unsigned RegSize = Is64Bit ? 64 : 32; 4343 unsigned DstBits = RetVT.getSizeInBits(); 4344 unsigned SrcBits = SrcVT.getSizeInBits(); 4345 const TargetRegisterClass *RC = 4346 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4347 4348 // Just emit a copy for "zero" shifts. 4349 if (Shift == 0) { 4350 if (RetVT == SrcVT) { 4351 unsigned ResultReg = createResultReg(RC); 4352 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4353 TII.get(TargetOpcode::COPY), ResultReg) 4354 .addReg(Op0, getKillRegState(Op0IsKill)); 4355 return ResultReg; 4356 } else 4357 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4358 } 4359 4360 // Don't deal with undefined shifts. 4361 if (Shift >= DstBits) 4362 return 0; 4363 4364 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4365 // {S|U}BFM Wd, Wn, #r, #s 4366 // Wd<s-r:0> = Wn<s:r> when r <= s 4367 4368 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4369 // %2 = ashr i16 %1, 4 4370 // Wd<7-4:0> = Wn<7:4> 4371 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4372 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4373 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4374 4375 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4376 // %2 = ashr i16 %1, 8 4377 // Wd<7-7,0> = Wn<7:7> 4378 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4381 4382 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4383 // %2 = ashr i16 %1, 12 4384 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4385 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4386 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4387 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4388 4389 if (Shift >= SrcBits && IsZExt) 4390 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4391 4392 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4393 unsigned ImmS = SrcBits - 1; 4394 static const unsigned OpcTable[2][2] = { 4395 {AArch64::SBFMWri, AArch64::SBFMXri}, 4396 {AArch64::UBFMWri, AArch64::UBFMXri} 4397 }; 4398 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4399 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4400 Register TmpReg = MRI.createVirtualRegister(RC); 4401 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4402 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4403 .addImm(0) 4404 .addReg(Op0, getKillRegState(Op0IsKill)) 4405 .addImm(AArch64::sub_32); 4406 Op0 = TmpReg; 4407 Op0IsKill = true; 4408 } 4409 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4410 } 4411 4412 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4413 bool IsZExt) { 4414 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4415 4416 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4417 // DestVT are odd things, so test to make sure that they are both types we can 4418 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4419 // bail out to SelectionDAG. 4420 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4421 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4422 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4423 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4424 return 0; 4425 4426 unsigned Opc; 4427 unsigned Imm = 0; 4428 4429 switch (SrcVT.SimpleTy) { 4430 default: 4431 return 0; 4432 case MVT::i1: 4433 return emiti1Ext(SrcReg, DestVT, IsZExt); 4434 case MVT::i8: 4435 if (DestVT == MVT::i64) 4436 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4437 else 4438 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4439 Imm = 7; 4440 break; 4441 case MVT::i16: 4442 if (DestVT == MVT::i64) 4443 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4444 else 4445 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4446 Imm = 15; 4447 break; 4448 case MVT::i32: 4449 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4450 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4451 Imm = 31; 4452 break; 4453 } 4454 4455 // Handle i8 and i16 as i32. 4456 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4457 DestVT = MVT::i32; 4458 else if (DestVT == MVT::i64) { 4459 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4461 TII.get(AArch64::SUBREG_TO_REG), Src64) 4462 .addImm(0) 4463 .addReg(SrcReg) 4464 .addImm(AArch64::sub_32); 4465 SrcReg = Src64; 4466 } 4467 4468 const TargetRegisterClass *RC = 4469 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4470 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4471 } 4472 4473 static bool isZExtLoad(const MachineInstr *LI) { 4474 switch (LI->getOpcode()) { 4475 default: 4476 return false; 4477 case AArch64::LDURBBi: 4478 case AArch64::LDURHHi: 4479 case AArch64::LDURWi: 4480 case AArch64::LDRBBui: 4481 case AArch64::LDRHHui: 4482 case AArch64::LDRWui: 4483 case AArch64::LDRBBroX: 4484 case AArch64::LDRHHroX: 4485 case AArch64::LDRWroX: 4486 case AArch64::LDRBBroW: 4487 case AArch64::LDRHHroW: 4488 case AArch64::LDRWroW: 4489 return true; 4490 } 4491 } 4492 4493 static bool isSExtLoad(const MachineInstr *LI) { 4494 switch (LI->getOpcode()) { 4495 default: 4496 return false; 4497 case AArch64::LDURSBWi: 4498 case AArch64::LDURSHWi: 4499 case AArch64::LDURSBXi: 4500 case AArch64::LDURSHXi: 4501 case AArch64::LDURSWi: 4502 case AArch64::LDRSBWui: 4503 case AArch64::LDRSHWui: 4504 case AArch64::LDRSBXui: 4505 case AArch64::LDRSHXui: 4506 case AArch64::LDRSWui: 4507 case AArch64::LDRSBWroX: 4508 case AArch64::LDRSHWroX: 4509 case AArch64::LDRSBXroX: 4510 case AArch64::LDRSHXroX: 4511 case AArch64::LDRSWroX: 4512 case AArch64::LDRSBWroW: 4513 case AArch64::LDRSHWroW: 4514 case AArch64::LDRSBXroW: 4515 case AArch64::LDRSHXroW: 4516 case AArch64::LDRSWroW: 4517 return true; 4518 } 4519 } 4520 4521 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4522 MVT SrcVT) { 4523 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4524 if (!LI || !LI->hasOneUse()) 4525 return false; 4526 4527 // Check if the load instruction has already been selected. 4528 unsigned Reg = lookUpRegForValue(LI); 4529 if (!Reg) 4530 return false; 4531 4532 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4533 if (!MI) 4534 return false; 4535 4536 // Check if the correct load instruction has been emitted - SelectionDAG might 4537 // have emitted a zero-extending load, but we need a sign-extending load. 4538 bool IsZExt = isa<ZExtInst>(I); 4539 const auto *LoadMI = MI; 4540 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4541 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4542 Register LoadReg = MI->getOperand(1).getReg(); 4543 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4544 assert(LoadMI && "Expected valid instruction"); 4545 } 4546 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4547 return false; 4548 4549 // Nothing to be done. 4550 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4551 updateValueMap(I, Reg); 4552 return true; 4553 } 4554 4555 if (IsZExt) { 4556 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4558 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4559 .addImm(0) 4560 .addReg(Reg, getKillRegState(true)) 4561 .addImm(AArch64::sub_32); 4562 Reg = Reg64; 4563 } else { 4564 assert((MI->getOpcode() == TargetOpcode::COPY && 4565 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4566 "Expected copy instruction"); 4567 Reg = MI->getOperand(1).getReg(); 4568 MachineBasicBlock::iterator I(MI); 4569 removeDeadCode(I, std::next(I)); 4570 } 4571 updateValueMap(I, Reg); 4572 return true; 4573 } 4574 4575 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4576 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4577 "Unexpected integer extend instruction."); 4578 MVT RetVT; 4579 MVT SrcVT; 4580 if (!isTypeSupported(I->getType(), RetVT)) 4581 return false; 4582 4583 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4584 return false; 4585 4586 // Try to optimize already sign-/zero-extended values from load instructions. 4587 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4588 return true; 4589 4590 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4591 if (!SrcReg) 4592 return false; 4593 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4594 4595 // Try to optimize already sign-/zero-extended values from function arguments. 4596 bool IsZExt = isa<ZExtInst>(I); 4597 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4598 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4599 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4600 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4601 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4602 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4603 .addImm(0) 4604 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4605 .addImm(AArch64::sub_32); 4606 SrcReg = ResultReg; 4607 } 4608 // Conservatively clear all kill flags from all uses, because we are 4609 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4610 // level. The result of the instruction at IR level might have been 4611 // trivially dead, which is now not longer true. 4612 unsigned UseReg = lookUpRegForValue(I); 4613 if (UseReg) 4614 MRI.clearKillFlags(UseReg); 4615 4616 updateValueMap(I, SrcReg); 4617 return true; 4618 } 4619 } 4620 4621 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4622 if (!ResultReg) 4623 return false; 4624 4625 updateValueMap(I, ResultReg); 4626 return true; 4627 } 4628 4629 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4630 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4631 if (!DestEVT.isSimple()) 4632 return false; 4633 4634 MVT DestVT = DestEVT.getSimpleVT(); 4635 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4636 return false; 4637 4638 unsigned DivOpc; 4639 bool Is64bit = (DestVT == MVT::i64); 4640 switch (ISDOpcode) { 4641 default: 4642 return false; 4643 case ISD::SREM: 4644 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4645 break; 4646 case ISD::UREM: 4647 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4648 break; 4649 } 4650 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4651 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4652 if (!Src0Reg) 4653 return false; 4654 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4655 4656 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4657 if (!Src1Reg) 4658 return false; 4659 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4660 4661 const TargetRegisterClass *RC = 4662 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4663 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4664 Src1Reg, /*IsKill=*/false); 4665 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4666 // The remainder is computed as numerator - (quotient * denominator) using the 4667 // MSUB instruction. 4668 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4669 Src1Reg, Src1IsKill, Src0Reg, 4670 Src0IsKill); 4671 updateValueMap(I, ResultReg); 4672 return true; 4673 } 4674 4675 bool AArch64FastISel::selectMul(const Instruction *I) { 4676 MVT VT; 4677 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4678 return false; 4679 4680 if (VT.isVector()) 4681 return selectBinaryOp(I, ISD::MUL); 4682 4683 const Value *Src0 = I->getOperand(0); 4684 const Value *Src1 = I->getOperand(1); 4685 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4686 if (C->getValue().isPowerOf2()) 4687 std::swap(Src0, Src1); 4688 4689 // Try to simplify to a shift instruction. 4690 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4691 if (C->getValue().isPowerOf2()) { 4692 uint64_t ShiftVal = C->getValue().logBase2(); 4693 MVT SrcVT = VT; 4694 bool IsZExt = true; 4695 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4696 if (!isIntExtFree(ZExt)) { 4697 MVT VT; 4698 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4699 SrcVT = VT; 4700 IsZExt = true; 4701 Src0 = ZExt->getOperand(0); 4702 } 4703 } 4704 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4705 if (!isIntExtFree(SExt)) { 4706 MVT VT; 4707 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4708 SrcVT = VT; 4709 IsZExt = false; 4710 Src0 = SExt->getOperand(0); 4711 } 4712 } 4713 } 4714 4715 unsigned Src0Reg = getRegForValue(Src0); 4716 if (!Src0Reg) 4717 return false; 4718 bool Src0IsKill = hasTrivialKill(Src0); 4719 4720 unsigned ResultReg = 4721 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4722 4723 if (ResultReg) { 4724 updateValueMap(I, ResultReg); 4725 return true; 4726 } 4727 } 4728 4729 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4730 if (!Src0Reg) 4731 return false; 4732 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4733 4734 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4735 if (!Src1Reg) 4736 return false; 4737 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4738 4739 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4740 4741 if (!ResultReg) 4742 return false; 4743 4744 updateValueMap(I, ResultReg); 4745 return true; 4746 } 4747 4748 bool AArch64FastISel::selectShift(const Instruction *I) { 4749 MVT RetVT; 4750 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4751 return false; 4752 4753 if (RetVT.isVector()) 4754 return selectOperator(I, I->getOpcode()); 4755 4756 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4757 unsigned ResultReg = 0; 4758 uint64_t ShiftVal = C->getZExtValue(); 4759 MVT SrcVT = RetVT; 4760 bool IsZExt = I->getOpcode() != Instruction::AShr; 4761 const Value *Op0 = I->getOperand(0); 4762 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4763 if (!isIntExtFree(ZExt)) { 4764 MVT TmpVT; 4765 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4766 SrcVT = TmpVT; 4767 IsZExt = true; 4768 Op0 = ZExt->getOperand(0); 4769 } 4770 } 4771 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4772 if (!isIntExtFree(SExt)) { 4773 MVT TmpVT; 4774 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4775 SrcVT = TmpVT; 4776 IsZExt = false; 4777 Op0 = SExt->getOperand(0); 4778 } 4779 } 4780 } 4781 4782 unsigned Op0Reg = getRegForValue(Op0); 4783 if (!Op0Reg) 4784 return false; 4785 bool Op0IsKill = hasTrivialKill(Op0); 4786 4787 switch (I->getOpcode()) { 4788 default: llvm_unreachable("Unexpected instruction."); 4789 case Instruction::Shl: 4790 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4791 break; 4792 case Instruction::AShr: 4793 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4794 break; 4795 case Instruction::LShr: 4796 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4797 break; 4798 } 4799 if (!ResultReg) 4800 return false; 4801 4802 updateValueMap(I, ResultReg); 4803 return true; 4804 } 4805 4806 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4807 if (!Op0Reg) 4808 return false; 4809 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4810 4811 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4812 if (!Op1Reg) 4813 return false; 4814 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4815 4816 unsigned ResultReg = 0; 4817 switch (I->getOpcode()) { 4818 default: llvm_unreachable("Unexpected instruction."); 4819 case Instruction::Shl: 4820 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4821 break; 4822 case Instruction::AShr: 4823 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4824 break; 4825 case Instruction::LShr: 4826 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4827 break; 4828 } 4829 4830 if (!ResultReg) 4831 return false; 4832 4833 updateValueMap(I, ResultReg); 4834 return true; 4835 } 4836 4837 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4838 MVT RetVT, SrcVT; 4839 4840 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4841 return false; 4842 if (!isTypeLegal(I->getType(), RetVT)) 4843 return false; 4844 4845 unsigned Opc; 4846 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4847 Opc = AArch64::FMOVWSr; 4848 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4849 Opc = AArch64::FMOVXDr; 4850 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4851 Opc = AArch64::FMOVSWr; 4852 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4853 Opc = AArch64::FMOVDXr; 4854 else 4855 return false; 4856 4857 const TargetRegisterClass *RC = nullptr; 4858 switch (RetVT.SimpleTy) { 4859 default: llvm_unreachable("Unexpected value type."); 4860 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4861 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4862 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4863 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4864 } 4865 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4866 if (!Op0Reg) 4867 return false; 4868 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4869 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4870 4871 if (!ResultReg) 4872 return false; 4873 4874 updateValueMap(I, ResultReg); 4875 return true; 4876 } 4877 4878 bool AArch64FastISel::selectFRem(const Instruction *I) { 4879 MVT RetVT; 4880 if (!isTypeLegal(I->getType(), RetVT)) 4881 return false; 4882 4883 RTLIB::Libcall LC; 4884 switch (RetVT.SimpleTy) { 4885 default: 4886 return false; 4887 case MVT::f32: 4888 LC = RTLIB::REM_F32; 4889 break; 4890 case MVT::f64: 4891 LC = RTLIB::REM_F64; 4892 break; 4893 } 4894 4895 ArgListTy Args; 4896 Args.reserve(I->getNumOperands()); 4897 4898 // Populate the argument list. 4899 for (auto &Arg : I->operands()) { 4900 ArgListEntry Entry; 4901 Entry.Val = Arg; 4902 Entry.Ty = Arg->getType(); 4903 Args.push_back(Entry); 4904 } 4905 4906 CallLoweringInfo CLI; 4907 MCContext &Ctx = MF->getContext(); 4908 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4909 TLI.getLibcallName(LC), std::move(Args)); 4910 if (!lowerCallTo(CLI)) 4911 return false; 4912 updateValueMap(I, CLI.ResultReg); 4913 return true; 4914 } 4915 4916 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4917 MVT VT; 4918 if (!isTypeLegal(I->getType(), VT)) 4919 return false; 4920 4921 if (!isa<ConstantInt>(I->getOperand(1))) 4922 return selectBinaryOp(I, ISD::SDIV); 4923 4924 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4925 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4926 !(C.isPowerOf2() || (-C).isPowerOf2())) 4927 return selectBinaryOp(I, ISD::SDIV); 4928 4929 unsigned Lg2 = C.countTrailingZeros(); 4930 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4931 if (!Src0Reg) 4932 return false; 4933 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4934 4935 if (cast<BinaryOperator>(I)->isExact()) { 4936 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4937 if (!ResultReg) 4938 return false; 4939 updateValueMap(I, ResultReg); 4940 return true; 4941 } 4942 4943 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4944 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4945 if (!AddReg) 4946 return false; 4947 4948 // (Src0 < 0) ? Pow2 - 1 : 0; 4949 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4950 return false; 4951 4952 unsigned SelectOpc; 4953 const TargetRegisterClass *RC; 4954 if (VT == MVT::i64) { 4955 SelectOpc = AArch64::CSELXr; 4956 RC = &AArch64::GPR64RegClass; 4957 } else { 4958 SelectOpc = AArch64::CSELWr; 4959 RC = &AArch64::GPR32RegClass; 4960 } 4961 unsigned SelectReg = 4962 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4963 Src0IsKill, AArch64CC::LT); 4964 if (!SelectReg) 4965 return false; 4966 4967 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4968 // negate the result. 4969 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4970 unsigned ResultReg; 4971 if (C.isNegative()) 4972 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4973 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4974 else 4975 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4976 4977 if (!ResultReg) 4978 return false; 4979 4980 updateValueMap(I, ResultReg); 4981 return true; 4982 } 4983 4984 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4985 /// have to duplicate it for AArch64, because otherwise we would fail during the 4986 /// sign-extend emission. 4987 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4988 unsigned IdxN = getRegForValue(Idx); 4989 if (IdxN == 0) 4990 // Unhandled operand. Halt "fast" selection and bail. 4991 return std::pair<unsigned, bool>(0, false); 4992 4993 bool IdxNIsKill = hasTrivialKill(Idx); 4994 4995 // If the index is smaller or larger than intptr_t, truncate or extend it. 4996 MVT PtrVT = TLI.getPointerTy(DL); 4997 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4998 if (IdxVT.bitsLT(PtrVT)) { 4999 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 5000 IdxNIsKill = true; 5001 } else if (IdxVT.bitsGT(PtrVT)) 5002 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 5003 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 5004 } 5005 5006 /// This is mostly a copy of the existing FastISel GEP code, but we have to 5007 /// duplicate it for AArch64, because otherwise we would bail out even for 5008 /// simple cases. This is because the standard fastEmit functions don't cover 5009 /// MUL at all and ADD is lowered very inefficientily. 5010 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 5011 if (Subtarget->isTargetILP32()) 5012 return false; 5013 5014 unsigned N = getRegForValue(I->getOperand(0)); 5015 if (!N) 5016 return false; 5017 bool NIsKill = hasTrivialKill(I->getOperand(0)); 5018 5019 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 5020 // into a single N = N + TotalOffset. 5021 uint64_t TotalOffs = 0; 5022 MVT VT = TLI.getPointerTy(DL); 5023 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 5024 GTI != E; ++GTI) { 5025 const Value *Idx = GTI.getOperand(); 5026 if (auto *StTy = GTI.getStructTypeOrNull()) { 5027 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 5028 // N = N + Offset 5029 if (Field) 5030 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 5031 } else { 5032 Type *Ty = GTI.getIndexedType(); 5033 5034 // If this is a constant subscript, handle it quickly. 5035 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5036 if (CI->isZero()) 5037 continue; 5038 // N = N + Offset 5039 TotalOffs += 5040 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 5041 continue; 5042 } 5043 if (TotalOffs) { 5044 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5045 if (!N) 5046 return false; 5047 NIsKill = true; 5048 TotalOffs = 0; 5049 } 5050 5051 // N = N + Idx * ElementSize; 5052 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 5053 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 5054 unsigned IdxN = Pair.first; 5055 bool IdxNIsKill = Pair.second; 5056 if (!IdxN) 5057 return false; 5058 5059 if (ElementSize != 1) { 5060 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5061 if (!C) 5062 return false; 5063 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 5064 if (!IdxN) 5065 return false; 5066 IdxNIsKill = true; 5067 } 5068 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 5069 if (!N) 5070 return false; 5071 } 5072 } 5073 if (TotalOffs) { 5074 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5075 if (!N) 5076 return false; 5077 } 5078 updateValueMap(I, N); 5079 return true; 5080 } 5081 5082 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5083 assert(TM.getOptLevel() == CodeGenOpt::None && 5084 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5085 5086 auto *RetPairTy = cast<StructType>(I->getType()); 5087 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5088 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5089 "cmpxchg has a non-i1 status result"); 5090 5091 MVT VT; 5092 if (!isTypeLegal(RetTy, VT)) 5093 return false; 5094 5095 const TargetRegisterClass *ResRC; 5096 unsigned Opc, CmpOpc; 5097 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5098 // extractvalue selection doesn't support that. 5099 if (VT == MVT::i32) { 5100 Opc = AArch64::CMP_SWAP_32; 5101 CmpOpc = AArch64::SUBSWrs; 5102 ResRC = &AArch64::GPR32RegClass; 5103 } else if (VT == MVT::i64) { 5104 Opc = AArch64::CMP_SWAP_64; 5105 CmpOpc = AArch64::SUBSXrs; 5106 ResRC = &AArch64::GPR64RegClass; 5107 } else { 5108 return false; 5109 } 5110 5111 const MCInstrDesc &II = TII.get(Opc); 5112 5113 const unsigned AddrReg = constrainOperandRegClass( 5114 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5115 const unsigned DesiredReg = constrainOperandRegClass( 5116 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5117 const unsigned NewReg = constrainOperandRegClass( 5118 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5119 5120 const unsigned ResultReg1 = createResultReg(ResRC); 5121 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5122 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5123 5124 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5126 .addDef(ResultReg1) 5127 .addDef(ScratchReg) 5128 .addUse(AddrReg) 5129 .addUse(DesiredReg) 5130 .addUse(NewReg); 5131 5132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5133 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5134 .addUse(ResultReg1) 5135 .addUse(DesiredReg) 5136 .addImm(0); 5137 5138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5139 .addDef(ResultReg2) 5140 .addUse(AArch64::WZR) 5141 .addUse(AArch64::WZR) 5142 .addImm(AArch64CC::NE); 5143 5144 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5145 updateValueMap(I, ResultReg1, 2); 5146 return true; 5147 } 5148 5149 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5150 switch (I->getOpcode()) { 5151 default: 5152 break; 5153 case Instruction::Add: 5154 case Instruction::Sub: 5155 return selectAddSub(I); 5156 case Instruction::Mul: 5157 return selectMul(I); 5158 case Instruction::SDiv: 5159 return selectSDiv(I); 5160 case Instruction::SRem: 5161 if (!selectBinaryOp(I, ISD::SREM)) 5162 return selectRem(I, ISD::SREM); 5163 return true; 5164 case Instruction::URem: 5165 if (!selectBinaryOp(I, ISD::UREM)) 5166 return selectRem(I, ISD::UREM); 5167 return true; 5168 case Instruction::Shl: 5169 case Instruction::LShr: 5170 case Instruction::AShr: 5171 return selectShift(I); 5172 case Instruction::And: 5173 case Instruction::Or: 5174 case Instruction::Xor: 5175 return selectLogicalOp(I); 5176 case Instruction::Br: 5177 return selectBranch(I); 5178 case Instruction::IndirectBr: 5179 return selectIndirectBr(I); 5180 case Instruction::BitCast: 5181 if (!FastISel::selectBitCast(I)) 5182 return selectBitCast(I); 5183 return true; 5184 case Instruction::FPToSI: 5185 if (!selectCast(I, ISD::FP_TO_SINT)) 5186 return selectFPToInt(I, /*Signed=*/true); 5187 return true; 5188 case Instruction::FPToUI: 5189 return selectFPToInt(I, /*Signed=*/false); 5190 case Instruction::ZExt: 5191 case Instruction::SExt: 5192 return selectIntExt(I); 5193 case Instruction::Trunc: 5194 if (!selectCast(I, ISD::TRUNCATE)) 5195 return selectTrunc(I); 5196 return true; 5197 case Instruction::FPExt: 5198 return selectFPExt(I); 5199 case Instruction::FPTrunc: 5200 return selectFPTrunc(I); 5201 case Instruction::SIToFP: 5202 if (!selectCast(I, ISD::SINT_TO_FP)) 5203 return selectIntToFP(I, /*Signed=*/true); 5204 return true; 5205 case Instruction::UIToFP: 5206 return selectIntToFP(I, /*Signed=*/false); 5207 case Instruction::Load: 5208 return selectLoad(I); 5209 case Instruction::Store: 5210 return selectStore(I); 5211 case Instruction::FCmp: 5212 case Instruction::ICmp: 5213 return selectCmp(I); 5214 case Instruction::Select: 5215 return selectSelect(I); 5216 case Instruction::Ret: 5217 return selectRet(I); 5218 case Instruction::FRem: 5219 return selectFRem(I); 5220 case Instruction::GetElementPtr: 5221 return selectGetElementPtr(I); 5222 case Instruction::AtomicCmpXchg: 5223 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5224 } 5225 5226 // fall-back to target-independent instruction selection. 5227 return selectOperator(I, I->getOpcode()); 5228 } 5229 5230 namespace llvm { 5231 5232 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5233 const TargetLibraryInfo *LibInfo) { 5234 return new AArch64FastISel(FuncInfo, LibInfo); 5235 } 5236 5237 } // end namespace llvm 5238