1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the AArch64-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // AArch64GenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64.h" 17 #include "AArch64CallingConvention.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/FastISel.h" 24 #include "llvm/CodeGen/FunctionLoweringInfo.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/IR/CallingConv.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/GetElementPtrTypeIterator.h" 34 #include "llvm/IR/GlobalAlias.h" 35 #include "llvm/IR/GlobalVariable.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Operator.h" 39 #include "llvm/MC/MCSymbol.h" 40 using namespace llvm; 41 42 namespace { 43 44 class AArch64FastISel final : public FastISel { 45 class Address { 46 public: 47 typedef enum { 48 RegBase, 49 FrameIndexBase 50 } BaseKind; 51 52 private: 53 BaseKind Kind; 54 AArch64_AM::ShiftExtendType ExtType; 55 union { 56 unsigned Reg; 57 int FI; 58 } Base; 59 unsigned OffsetReg; 60 unsigned Shift; 61 int64_t Offset; 62 const GlobalValue *GV; 63 64 public: 65 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), 66 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } 67 void setKind(BaseKind K) { Kind = K; } 68 BaseKind getKind() const { return Kind; } 69 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 70 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 71 bool isRegBase() const { return Kind == RegBase; } 72 bool isFIBase() const { return Kind == FrameIndexBase; } 73 void setReg(unsigned Reg) { 74 assert(isRegBase() && "Invalid base register access!"); 75 Base.Reg = Reg; 76 } 77 unsigned getReg() const { 78 assert(isRegBase() && "Invalid base register access!"); 79 return Base.Reg; 80 } 81 void setOffsetReg(unsigned Reg) { 82 OffsetReg = Reg; 83 } 84 unsigned getOffsetReg() const { 85 return OffsetReg; 86 } 87 void setFI(unsigned FI) { 88 assert(isFIBase() && "Invalid base frame index access!"); 89 Base.FI = FI; 90 } 91 unsigned getFI() const { 92 assert(isFIBase() && "Invalid base frame index access!"); 93 return Base.FI; 94 } 95 void setOffset(int64_t O) { Offset = O; } 96 int64_t getOffset() { return Offset; } 97 void setShift(unsigned S) { Shift = S; } 98 unsigned getShift() { return Shift; } 99 100 void setGlobalValue(const GlobalValue *G) { GV = G; } 101 const GlobalValue *getGlobalValue() { return GV; } 102 }; 103 104 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 105 /// make the right decision when generating code for different targets. 106 const AArch64Subtarget *Subtarget; 107 LLVMContext *Context; 108 109 bool fastLowerArguments() override; 110 bool fastLowerCall(CallLoweringInfo &CLI) override; 111 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 112 113 private: 114 // Selection routines. 115 bool selectAddSub(const Instruction *I); 116 bool selectLogicalOp(const Instruction *I); 117 bool selectLoad(const Instruction *I); 118 bool selectStore(const Instruction *I); 119 bool selectBranch(const Instruction *I); 120 bool selectIndirectBr(const Instruction *I); 121 bool selectCmp(const Instruction *I); 122 bool selectSelect(const Instruction *I); 123 bool selectFPExt(const Instruction *I); 124 bool selectFPTrunc(const Instruction *I); 125 bool selectFPToInt(const Instruction *I, bool Signed); 126 bool selectIntToFP(const Instruction *I, bool Signed); 127 bool selectRem(const Instruction *I, unsigned ISDOpcode); 128 bool selectRet(const Instruction *I); 129 bool selectTrunc(const Instruction *I); 130 bool selectIntExt(const Instruction *I); 131 bool selectMul(const Instruction *I); 132 bool selectShift(const Instruction *I); 133 bool selectBitCast(const Instruction *I); 134 bool selectFRem(const Instruction *I); 135 bool selectSDiv(const Instruction *I); 136 bool selectGetElementPtr(const Instruction *I); 137 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 138 139 // Utility helper routines. 140 bool isTypeLegal(Type *Ty, MVT &VT); 141 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 142 bool isValueAvailable(const Value *V) const; 143 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 144 bool computeCallAddress(const Value *V, Address &Addr); 145 bool simplifyAddress(Address &Addr, MVT VT); 146 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 147 MachineMemOperand::Flags Flags, 148 unsigned ScaleFactor, MachineMemOperand *MMO); 149 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 150 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 151 unsigned Alignment); 152 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 153 const Value *Cond); 154 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 155 bool optimizeSelect(const SelectInst *SI); 156 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 157 158 // Emit helper routines. 159 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 160 const Value *RHS, bool SetFlags = false, 161 bool WantResult = true, bool IsZExt = false); 162 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 163 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 164 bool SetFlags = false, bool WantResult = true); 165 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 166 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 167 bool WantResult = true); 168 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 169 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 170 AArch64_AM::ShiftExtendType ShiftType, 171 uint64_t ShiftImm, bool SetFlags = false, 172 bool WantResult = true); 173 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 174 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 175 AArch64_AM::ShiftExtendType ExtType, 176 uint64_t ShiftImm, bool SetFlags = false, 177 bool WantResult = true); 178 179 // Emit functions. 180 bool emitCompareAndBranch(const BranchInst *BI); 181 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 182 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 183 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 184 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 185 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 186 MachineMemOperand *MMO = nullptr); 187 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 188 MachineMemOperand *MMO = nullptr); 189 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 190 MachineMemOperand *MMO = nullptr); 191 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 192 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 193 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 194 bool SetFlags = false, bool WantResult = true, 195 bool IsZExt = false); 196 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 197 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 198 bool SetFlags = false, bool WantResult = true, 199 bool IsZExt = false); 200 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 201 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 202 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 203 unsigned RHSReg, bool RHSIsKill, 204 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 205 bool WantResult = true); 206 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 207 const Value *RHS); 208 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 209 bool LHSIsKill, uint64_t Imm); 210 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 212 uint64_t ShiftImm); 213 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 214 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 215 unsigned Op1, bool Op1IsKill); 216 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 217 unsigned Op1, bool Op1IsKill); 218 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 219 unsigned Op1, bool Op1IsKill); 220 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 221 unsigned Op1Reg, bool Op1IsKill); 222 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 223 uint64_t Imm, bool IsZExt = true); 224 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 225 unsigned Op1Reg, bool Op1IsKill); 226 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 227 uint64_t Imm, bool IsZExt = true); 228 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 229 unsigned Op1Reg, bool Op1IsKill); 230 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 231 uint64_t Imm, bool IsZExt = false); 232 233 unsigned materializeInt(const ConstantInt *CI, MVT VT); 234 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 235 unsigned materializeGV(const GlobalValue *GV); 236 237 // Call handling routines. 238 private: 239 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 240 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 241 unsigned &NumBytes); 242 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 243 244 public: 245 // Backend specific FastISel code. 246 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 247 unsigned fastMaterializeConstant(const Constant *C) override; 248 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 249 250 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 251 const TargetLibraryInfo *LibInfo) 252 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 253 Subtarget = 254 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 255 Context = &FuncInfo.Fn->getContext(); 256 } 257 258 bool fastSelectInstruction(const Instruction *I) override; 259 260 #include "AArch64GenFastISel.inc" 261 }; 262 263 } // end anonymous namespace 264 265 #include "AArch64GenCallingConv.inc" 266 267 /// \brief Check if the sign-/zero-extend will be a noop. 268 static bool isIntExtFree(const Instruction *I) { 269 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 270 "Unexpected integer extend instruction."); 271 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 272 "Unexpected value type."); 273 bool IsZExt = isa<ZExtInst>(I); 274 275 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 276 if (LI->hasOneUse()) 277 return true; 278 279 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 280 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 281 return true; 282 283 return false; 284 } 285 286 /// \brief Determine the implicit scale factor that is applied by a memory 287 /// operation for a given value type. 288 static unsigned getImplicitScaleFactor(MVT VT) { 289 switch (VT.SimpleTy) { 290 default: 291 return 0; // invalid 292 case MVT::i1: // fall-through 293 case MVT::i8: 294 return 1; 295 case MVT::i16: 296 return 2; 297 case MVT::i32: // fall-through 298 case MVT::f32: 299 return 4; 300 case MVT::i64: // fall-through 301 case MVT::f64: 302 return 8; 303 } 304 } 305 306 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 307 if (CC == CallingConv::WebKit_JS) 308 return CC_AArch64_WebKit_JS; 309 if (CC == CallingConv::GHC) 310 return CC_AArch64_GHC; 311 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 312 } 313 314 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 315 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 316 "Alloca should always return a pointer."); 317 318 // Don't handle dynamic allocas. 319 if (!FuncInfo.StaticAllocaMap.count(AI)) 320 return 0; 321 322 DenseMap<const AllocaInst *, int>::iterator SI = 323 FuncInfo.StaticAllocaMap.find(AI); 324 325 if (SI != FuncInfo.StaticAllocaMap.end()) { 326 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 327 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 328 ResultReg) 329 .addFrameIndex(SI->second) 330 .addImm(0) 331 .addImm(0); 332 return ResultReg; 333 } 334 335 return 0; 336 } 337 338 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 339 if (VT > MVT::i64) 340 return 0; 341 342 if (!CI->isZero()) 343 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 344 345 // Create a copy from the zero register to materialize a "0" value. 346 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 347 : &AArch64::GPR32RegClass; 348 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 349 unsigned ResultReg = createResultReg(RC); 350 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 351 ResultReg).addReg(ZeroReg, getKillRegState(true)); 352 return ResultReg; 353 } 354 355 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 356 // Positive zero (+0.0) has to be materialized with a fmov from the zero 357 // register, because the immediate version of fmov cannot encode zero. 358 if (CFP->isNullValue()) 359 return fastMaterializeFloatZero(CFP); 360 361 if (VT != MVT::f32 && VT != MVT::f64) 362 return 0; 363 364 const APFloat Val = CFP->getValueAPF(); 365 bool Is64Bit = (VT == MVT::f64); 366 // This checks to see if we can use FMOV instructions to materialize 367 // a constant, otherwise we have to materialize via the constant pool. 368 if (TLI.isFPImmLegal(Val, VT)) { 369 int Imm = 370 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 371 assert((Imm != -1) && "Cannot encode floating-point constant."); 372 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 373 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 374 } 375 376 // For the MachO large code model materialize the FP constant in code. 377 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 378 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 379 const TargetRegisterClass *RC = Is64Bit ? 380 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 381 382 unsigned TmpReg = createResultReg(RC); 383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 384 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 385 386 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 387 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 388 TII.get(TargetOpcode::COPY), ResultReg) 389 .addReg(TmpReg, getKillRegState(true)); 390 391 return ResultReg; 392 } 393 394 // Materialize via constant pool. MachineConstantPool wants an explicit 395 // alignment. 396 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 397 if (Align == 0) 398 Align = DL.getTypeAllocSize(CFP->getType()); 399 400 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 401 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 402 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 403 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 404 405 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 406 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 407 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 408 .addReg(ADRPReg) 409 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 410 return ResultReg; 411 } 412 413 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 414 // We can't handle thread-local variables quickly yet. 415 if (GV->isThreadLocal()) 416 return 0; 417 418 // MachO still uses GOT for large code-model accesses, but ELF requires 419 // movz/movk sequences, which FastISel doesn't handle yet. 420 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 421 return 0; 422 423 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 424 425 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 426 if (!DestEVT.isSimple()) 427 return 0; 428 429 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 430 unsigned ResultReg; 431 432 if (OpFlags & AArch64II::MO_GOT) { 433 // ADRP + LDRX 434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 435 ADRPReg) 436 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 437 438 ResultReg = createResultReg(&AArch64::GPR64RegClass); 439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 440 ResultReg) 441 .addReg(ADRPReg) 442 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 443 AArch64II::MO_NC); 444 } else { 445 // ADRP + ADDX 446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 447 ADRPReg) 448 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 449 450 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 452 ResultReg) 453 .addReg(ADRPReg) 454 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 455 .addImm(0); 456 } 457 return ResultReg; 458 } 459 460 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 461 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 462 463 // Only handle simple types. 464 if (!CEVT.isSimple()) 465 return 0; 466 MVT VT = CEVT.getSimpleVT(); 467 468 if (const auto *CI = dyn_cast<ConstantInt>(C)) 469 return materializeInt(CI, VT); 470 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 471 return materializeFP(CFP, VT); 472 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 473 return materializeGV(GV); 474 475 return 0; 476 } 477 478 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 479 assert(CFP->isNullValue() && 480 "Floating-point constant is not a positive zero."); 481 MVT VT; 482 if (!isTypeLegal(CFP->getType(), VT)) 483 return 0; 484 485 if (VT != MVT::f32 && VT != MVT::f64) 486 return 0; 487 488 bool Is64Bit = (VT == MVT::f64); 489 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 490 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 491 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 492 } 493 494 /// \brief Check if the multiply is by a power-of-2 constant. 495 static bool isMulPowOf2(const Value *I) { 496 if (const auto *MI = dyn_cast<MulOperator>(I)) { 497 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 498 if (C->getValue().isPowerOf2()) 499 return true; 500 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 501 if (C->getValue().isPowerOf2()) 502 return true; 503 } 504 return false; 505 } 506 507 // Computes the address to get to an object. 508 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 509 { 510 const User *U = nullptr; 511 unsigned Opcode = Instruction::UserOp1; 512 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 513 // Don't walk into other basic blocks unless the object is an alloca from 514 // another block, otherwise it may not have a virtual register assigned. 515 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 516 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 517 Opcode = I->getOpcode(); 518 U = I; 519 } 520 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 521 Opcode = C->getOpcode(); 522 U = C; 523 } 524 525 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 526 if (Ty->getAddressSpace() > 255) 527 // Fast instruction selection doesn't support the special 528 // address spaces. 529 return false; 530 531 switch (Opcode) { 532 default: 533 break; 534 case Instruction::BitCast: { 535 // Look through bitcasts. 536 return computeAddress(U->getOperand(0), Addr, Ty); 537 } 538 case Instruction::IntToPtr: { 539 // Look past no-op inttoptrs. 540 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 541 TLI.getPointerTy(DL)) 542 return computeAddress(U->getOperand(0), Addr, Ty); 543 break; 544 } 545 case Instruction::PtrToInt: { 546 // Look past no-op ptrtoints. 547 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 548 return computeAddress(U->getOperand(0), Addr, Ty); 549 break; 550 } 551 case Instruction::GetElementPtr: { 552 Address SavedAddr = Addr; 553 uint64_t TmpOffset = Addr.getOffset(); 554 555 // Iterate through the GEP folding the constants into offsets where 556 // we can. 557 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 558 GTI != E; ++GTI) { 559 const Value *Op = GTI.getOperand(); 560 if (StructType *STy = GTI.getStructTypeOrNull()) { 561 const StructLayout *SL = DL.getStructLayout(STy); 562 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 563 TmpOffset += SL->getElementOffset(Idx); 564 } else { 565 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 566 for (;;) { 567 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 568 // Constant-offset addressing. 569 TmpOffset += CI->getSExtValue() * S; 570 break; 571 } 572 if (canFoldAddIntoGEP(U, Op)) { 573 // A compatible add with a constant operand. Fold the constant. 574 ConstantInt *CI = 575 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 576 TmpOffset += CI->getSExtValue() * S; 577 // Iterate on the other operand. 578 Op = cast<AddOperator>(Op)->getOperand(0); 579 continue; 580 } 581 // Unsupported 582 goto unsupported_gep; 583 } 584 } 585 } 586 587 // Try to grab the base operand now. 588 Addr.setOffset(TmpOffset); 589 if (computeAddress(U->getOperand(0), Addr, Ty)) 590 return true; 591 592 // We failed, restore everything and try the other options. 593 Addr = SavedAddr; 594 595 unsupported_gep: 596 break; 597 } 598 case Instruction::Alloca: { 599 const AllocaInst *AI = cast<AllocaInst>(Obj); 600 DenseMap<const AllocaInst *, int>::iterator SI = 601 FuncInfo.StaticAllocaMap.find(AI); 602 if (SI != FuncInfo.StaticAllocaMap.end()) { 603 Addr.setKind(Address::FrameIndexBase); 604 Addr.setFI(SI->second); 605 return true; 606 } 607 break; 608 } 609 case Instruction::Add: { 610 // Adds of constants are common and easy enough. 611 const Value *LHS = U->getOperand(0); 612 const Value *RHS = U->getOperand(1); 613 614 if (isa<ConstantInt>(LHS)) 615 std::swap(LHS, RHS); 616 617 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 618 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 619 return computeAddress(LHS, Addr, Ty); 620 } 621 622 Address Backup = Addr; 623 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 624 return true; 625 Addr = Backup; 626 627 break; 628 } 629 case Instruction::Sub: { 630 // Subs of constants are common and easy enough. 631 const Value *LHS = U->getOperand(0); 632 const Value *RHS = U->getOperand(1); 633 634 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 635 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 636 return computeAddress(LHS, Addr, Ty); 637 } 638 break; 639 } 640 case Instruction::Shl: { 641 if (Addr.getOffsetReg()) 642 break; 643 644 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 645 if (!CI) 646 break; 647 648 unsigned Val = CI->getZExtValue(); 649 if (Val < 1 || Val > 3) 650 break; 651 652 uint64_t NumBytes = 0; 653 if (Ty && Ty->isSized()) { 654 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 655 NumBytes = NumBits / 8; 656 if (!isPowerOf2_64(NumBits)) 657 NumBytes = 0; 658 } 659 660 if (NumBytes != (1ULL << Val)) 661 break; 662 663 Addr.setShift(Val); 664 Addr.setExtendType(AArch64_AM::LSL); 665 666 const Value *Src = U->getOperand(0); 667 if (const auto *I = dyn_cast<Instruction>(Src)) { 668 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 669 // Fold the zext or sext when it won't become a noop. 670 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 671 if (!isIntExtFree(ZE) && 672 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 673 Addr.setExtendType(AArch64_AM::UXTW); 674 Src = ZE->getOperand(0); 675 } 676 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 677 if (!isIntExtFree(SE) && 678 SE->getOperand(0)->getType()->isIntegerTy(32)) { 679 Addr.setExtendType(AArch64_AM::SXTW); 680 Src = SE->getOperand(0); 681 } 682 } 683 } 684 } 685 686 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 687 if (AI->getOpcode() == Instruction::And) { 688 const Value *LHS = AI->getOperand(0); 689 const Value *RHS = AI->getOperand(1); 690 691 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 692 if (C->getValue() == 0xffffffff) 693 std::swap(LHS, RHS); 694 695 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 696 if (C->getValue() == 0xffffffff) { 697 Addr.setExtendType(AArch64_AM::UXTW); 698 unsigned Reg = getRegForValue(LHS); 699 if (!Reg) 700 return false; 701 bool RegIsKill = hasTrivialKill(LHS); 702 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 703 AArch64::sub_32); 704 Addr.setOffsetReg(Reg); 705 return true; 706 } 707 } 708 709 unsigned Reg = getRegForValue(Src); 710 if (!Reg) 711 return false; 712 Addr.setOffsetReg(Reg); 713 return true; 714 } 715 case Instruction::Mul: { 716 if (Addr.getOffsetReg()) 717 break; 718 719 if (!isMulPowOf2(U)) 720 break; 721 722 const Value *LHS = U->getOperand(0); 723 const Value *RHS = U->getOperand(1); 724 725 // Canonicalize power-of-2 value to the RHS. 726 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 727 if (C->getValue().isPowerOf2()) 728 std::swap(LHS, RHS); 729 730 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 731 const auto *C = cast<ConstantInt>(RHS); 732 unsigned Val = C->getValue().logBase2(); 733 if (Val < 1 || Val > 3) 734 break; 735 736 uint64_t NumBytes = 0; 737 if (Ty && Ty->isSized()) { 738 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 739 NumBytes = NumBits / 8; 740 if (!isPowerOf2_64(NumBits)) 741 NumBytes = 0; 742 } 743 744 if (NumBytes != (1ULL << Val)) 745 break; 746 747 Addr.setShift(Val); 748 Addr.setExtendType(AArch64_AM::LSL); 749 750 const Value *Src = LHS; 751 if (const auto *I = dyn_cast<Instruction>(Src)) { 752 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 753 // Fold the zext or sext when it won't become a noop. 754 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 755 if (!isIntExtFree(ZE) && 756 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 757 Addr.setExtendType(AArch64_AM::UXTW); 758 Src = ZE->getOperand(0); 759 } 760 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 761 if (!isIntExtFree(SE) && 762 SE->getOperand(0)->getType()->isIntegerTy(32)) { 763 Addr.setExtendType(AArch64_AM::SXTW); 764 Src = SE->getOperand(0); 765 } 766 } 767 } 768 } 769 770 unsigned Reg = getRegForValue(Src); 771 if (!Reg) 772 return false; 773 Addr.setOffsetReg(Reg); 774 return true; 775 } 776 case Instruction::And: { 777 if (Addr.getOffsetReg()) 778 break; 779 780 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 781 break; 782 783 const Value *LHS = U->getOperand(0); 784 const Value *RHS = U->getOperand(1); 785 786 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 787 if (C->getValue() == 0xffffffff) 788 std::swap(LHS, RHS); 789 790 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 791 if (C->getValue() == 0xffffffff) { 792 Addr.setShift(0); 793 Addr.setExtendType(AArch64_AM::LSL); 794 Addr.setExtendType(AArch64_AM::UXTW); 795 796 unsigned Reg = getRegForValue(LHS); 797 if (!Reg) 798 return false; 799 bool RegIsKill = hasTrivialKill(LHS); 800 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 801 AArch64::sub_32); 802 Addr.setOffsetReg(Reg); 803 return true; 804 } 805 break; 806 } 807 case Instruction::SExt: 808 case Instruction::ZExt: { 809 if (!Addr.getReg() || Addr.getOffsetReg()) 810 break; 811 812 const Value *Src = nullptr; 813 // Fold the zext or sext when it won't become a noop. 814 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 815 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 816 Addr.setExtendType(AArch64_AM::UXTW); 817 Src = ZE->getOperand(0); 818 } 819 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 820 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 821 Addr.setExtendType(AArch64_AM::SXTW); 822 Src = SE->getOperand(0); 823 } 824 } 825 826 if (!Src) 827 break; 828 829 Addr.setShift(0); 830 unsigned Reg = getRegForValue(Src); 831 if (!Reg) 832 return false; 833 Addr.setOffsetReg(Reg); 834 return true; 835 } 836 } // end switch 837 838 if (Addr.isRegBase() && !Addr.getReg()) { 839 unsigned Reg = getRegForValue(Obj); 840 if (!Reg) 841 return false; 842 Addr.setReg(Reg); 843 return true; 844 } 845 846 if (!Addr.getOffsetReg()) { 847 unsigned Reg = getRegForValue(Obj); 848 if (!Reg) 849 return false; 850 Addr.setOffsetReg(Reg); 851 return true; 852 } 853 854 return false; 855 } 856 857 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 858 const User *U = nullptr; 859 unsigned Opcode = Instruction::UserOp1; 860 bool InMBB = true; 861 862 if (const auto *I = dyn_cast<Instruction>(V)) { 863 Opcode = I->getOpcode(); 864 U = I; 865 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 866 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 867 Opcode = C->getOpcode(); 868 U = C; 869 } 870 871 switch (Opcode) { 872 default: break; 873 case Instruction::BitCast: 874 // Look past bitcasts if its operand is in the same BB. 875 if (InMBB) 876 return computeCallAddress(U->getOperand(0), Addr); 877 break; 878 case Instruction::IntToPtr: 879 // Look past no-op inttoptrs if its operand is in the same BB. 880 if (InMBB && 881 TLI.getValueType(DL, U->getOperand(0)->getType()) == 882 TLI.getPointerTy(DL)) 883 return computeCallAddress(U->getOperand(0), Addr); 884 break; 885 case Instruction::PtrToInt: 886 // Look past no-op ptrtoints if its operand is in the same BB. 887 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 888 return computeCallAddress(U->getOperand(0), Addr); 889 break; 890 } 891 892 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 893 Addr.setGlobalValue(GV); 894 return true; 895 } 896 897 // If all else fails, try to materialize the value in a register. 898 if (!Addr.getGlobalValue()) { 899 Addr.setReg(getRegForValue(V)); 900 return Addr.getReg() != 0; 901 } 902 903 return false; 904 } 905 906 907 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 908 EVT evt = TLI.getValueType(DL, Ty, true); 909 910 // Only handle simple types. 911 if (evt == MVT::Other || !evt.isSimple()) 912 return false; 913 VT = evt.getSimpleVT(); 914 915 // This is a legal type, but it's not something we handle in fast-isel. 916 if (VT == MVT::f128) 917 return false; 918 919 // Handle all other legal types, i.e. a register that will directly hold this 920 // value. 921 return TLI.isTypeLegal(VT); 922 } 923 924 /// \brief Determine if the value type is supported by FastISel. 925 /// 926 /// FastISel for AArch64 can handle more value types than are legal. This adds 927 /// simple value type such as i1, i8, and i16. 928 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 929 if (Ty->isVectorTy() && !IsVectorAllowed) 930 return false; 931 932 if (isTypeLegal(Ty, VT)) 933 return true; 934 935 // If this is a type than can be sign or zero-extended to a basic operation 936 // go ahead and accept it now. 937 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 938 return true; 939 940 return false; 941 } 942 943 bool AArch64FastISel::isValueAvailable(const Value *V) const { 944 if (!isa<Instruction>(V)) 945 return true; 946 947 const auto *I = cast<Instruction>(V); 948 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 949 } 950 951 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 952 unsigned ScaleFactor = getImplicitScaleFactor(VT); 953 if (!ScaleFactor) 954 return false; 955 956 bool ImmediateOffsetNeedsLowering = false; 957 bool RegisterOffsetNeedsLowering = false; 958 int64_t Offset = Addr.getOffset(); 959 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 960 ImmediateOffsetNeedsLowering = true; 961 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 962 !isUInt<12>(Offset / ScaleFactor)) 963 ImmediateOffsetNeedsLowering = true; 964 965 // Cannot encode an offset register and an immediate offset in the same 966 // instruction. Fold the immediate offset into the load/store instruction and 967 // emit an additional add to take care of the offset register. 968 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 969 RegisterOffsetNeedsLowering = true; 970 971 // Cannot encode zero register as base. 972 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 973 RegisterOffsetNeedsLowering = true; 974 975 // If this is a stack pointer and the offset needs to be simplified then put 976 // the alloca address into a register, set the base type back to register and 977 // continue. This should almost never happen. 978 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 979 { 980 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 981 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 982 ResultReg) 983 .addFrameIndex(Addr.getFI()) 984 .addImm(0) 985 .addImm(0); 986 Addr.setKind(Address::RegBase); 987 Addr.setReg(ResultReg); 988 } 989 990 if (RegisterOffsetNeedsLowering) { 991 unsigned ResultReg = 0; 992 if (Addr.getReg()) { 993 if (Addr.getExtendType() == AArch64_AM::SXTW || 994 Addr.getExtendType() == AArch64_AM::UXTW ) 995 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 996 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 997 /*TODO:IsKill=*/false, Addr.getExtendType(), 998 Addr.getShift()); 999 else 1000 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1001 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1002 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1003 Addr.getShift()); 1004 } else { 1005 if (Addr.getExtendType() == AArch64_AM::UXTW) 1006 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1007 /*Op0IsKill=*/false, Addr.getShift(), 1008 /*IsZExt=*/true); 1009 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1010 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1011 /*Op0IsKill=*/false, Addr.getShift(), 1012 /*IsZExt=*/false); 1013 else 1014 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1015 /*Op0IsKill=*/false, Addr.getShift()); 1016 } 1017 if (!ResultReg) 1018 return false; 1019 1020 Addr.setReg(ResultReg); 1021 Addr.setOffsetReg(0); 1022 Addr.setShift(0); 1023 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1024 } 1025 1026 // Since the offset is too large for the load/store instruction get the 1027 // reg+offset into a register. 1028 if (ImmediateOffsetNeedsLowering) { 1029 unsigned ResultReg; 1030 if (Addr.getReg()) 1031 // Try to fold the immediate into the add instruction. 1032 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1033 else 1034 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1035 1036 if (!ResultReg) 1037 return false; 1038 Addr.setReg(ResultReg); 1039 Addr.setOffset(0); 1040 } 1041 return true; 1042 } 1043 1044 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1045 const MachineInstrBuilder &MIB, 1046 MachineMemOperand::Flags Flags, 1047 unsigned ScaleFactor, 1048 MachineMemOperand *MMO) { 1049 int64_t Offset = Addr.getOffset() / ScaleFactor; 1050 // Frame base works a bit differently. Handle it separately. 1051 if (Addr.isFIBase()) { 1052 int FI = Addr.getFI(); 1053 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1054 // and alignment should be based on the VT. 1055 MMO = FuncInfo.MF->getMachineMemOperand( 1056 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1057 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1058 // Now add the rest of the operands. 1059 MIB.addFrameIndex(FI).addImm(Offset); 1060 } else { 1061 assert(Addr.isRegBase() && "Unexpected address kind."); 1062 const MCInstrDesc &II = MIB->getDesc(); 1063 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1064 Addr.setReg( 1065 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1066 Addr.setOffsetReg( 1067 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1068 if (Addr.getOffsetReg()) { 1069 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1070 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1071 Addr.getExtendType() == AArch64_AM::SXTX; 1072 MIB.addReg(Addr.getReg()); 1073 MIB.addReg(Addr.getOffsetReg()); 1074 MIB.addImm(IsSigned); 1075 MIB.addImm(Addr.getShift() != 0); 1076 } else 1077 MIB.addReg(Addr.getReg()).addImm(Offset); 1078 } 1079 1080 if (MMO) 1081 MIB.addMemOperand(MMO); 1082 } 1083 1084 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1085 const Value *RHS, bool SetFlags, 1086 bool WantResult, bool IsZExt) { 1087 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1088 bool NeedExtend = false; 1089 switch (RetVT.SimpleTy) { 1090 default: 1091 return 0; 1092 case MVT::i1: 1093 NeedExtend = true; 1094 break; 1095 case MVT::i8: 1096 NeedExtend = true; 1097 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1098 break; 1099 case MVT::i16: 1100 NeedExtend = true; 1101 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1102 break; 1103 case MVT::i32: // fall-through 1104 case MVT::i64: 1105 break; 1106 } 1107 MVT SrcVT = RetVT; 1108 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1109 1110 // Canonicalize immediates to the RHS first. 1111 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1112 std::swap(LHS, RHS); 1113 1114 // Canonicalize mul by power of 2 to the RHS. 1115 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1116 if (isMulPowOf2(LHS)) 1117 std::swap(LHS, RHS); 1118 1119 // Canonicalize shift immediate to the RHS. 1120 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1121 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1122 if (isa<ConstantInt>(SI->getOperand(1))) 1123 if (SI->getOpcode() == Instruction::Shl || 1124 SI->getOpcode() == Instruction::LShr || 1125 SI->getOpcode() == Instruction::AShr ) 1126 std::swap(LHS, RHS); 1127 1128 unsigned LHSReg = getRegForValue(LHS); 1129 if (!LHSReg) 1130 return 0; 1131 bool LHSIsKill = hasTrivialKill(LHS); 1132 1133 if (NeedExtend) 1134 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1135 1136 unsigned ResultReg = 0; 1137 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1138 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1139 if (C->isNegative()) 1140 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1141 SetFlags, WantResult); 1142 else 1143 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1144 WantResult); 1145 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1146 if (C->isNullValue()) 1147 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1148 WantResult); 1149 1150 if (ResultReg) 1151 return ResultReg; 1152 1153 // Only extend the RHS within the instruction if there is a valid extend type. 1154 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1155 isValueAvailable(RHS)) { 1156 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1157 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1158 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1159 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1160 if (!RHSReg) 1161 return 0; 1162 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1163 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1164 RHSIsKill, ExtendType, C->getZExtValue(), 1165 SetFlags, WantResult); 1166 } 1167 unsigned RHSReg = getRegForValue(RHS); 1168 if (!RHSReg) 1169 return 0; 1170 bool RHSIsKill = hasTrivialKill(RHS); 1171 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1172 ExtendType, 0, SetFlags, WantResult); 1173 } 1174 1175 // Check if the mul can be folded into the instruction. 1176 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1177 if (isMulPowOf2(RHS)) { 1178 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1179 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1180 1181 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1182 if (C->getValue().isPowerOf2()) 1183 std::swap(MulLHS, MulRHS); 1184 1185 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1186 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1187 unsigned RHSReg = getRegForValue(MulLHS); 1188 if (!RHSReg) 1189 return 0; 1190 bool RHSIsKill = hasTrivialKill(MulLHS); 1191 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1192 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1193 WantResult); 1194 if (ResultReg) 1195 return ResultReg; 1196 } 1197 } 1198 1199 // Check if the shift can be folded into the instruction. 1200 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1201 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1202 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1203 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1204 switch (SI->getOpcode()) { 1205 default: break; 1206 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1207 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1208 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1209 } 1210 uint64_t ShiftVal = C->getZExtValue(); 1211 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1212 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1213 if (!RHSReg) 1214 return 0; 1215 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1216 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1217 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1218 WantResult); 1219 if (ResultReg) 1220 return ResultReg; 1221 } 1222 } 1223 } 1224 } 1225 1226 unsigned RHSReg = getRegForValue(RHS); 1227 if (!RHSReg) 1228 return 0; 1229 bool RHSIsKill = hasTrivialKill(RHS); 1230 1231 if (NeedExtend) 1232 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1233 1234 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1235 SetFlags, WantResult); 1236 } 1237 1238 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1239 bool LHSIsKill, unsigned RHSReg, 1240 bool RHSIsKill, bool SetFlags, 1241 bool WantResult) { 1242 assert(LHSReg && RHSReg && "Invalid register number."); 1243 1244 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1245 return 0; 1246 1247 static const unsigned OpcTable[2][2][2] = { 1248 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1249 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1250 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1251 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1252 }; 1253 bool Is64Bit = RetVT == MVT::i64; 1254 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1255 const TargetRegisterClass *RC = 1256 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1257 unsigned ResultReg; 1258 if (WantResult) 1259 ResultReg = createResultReg(RC); 1260 else 1261 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1262 1263 const MCInstrDesc &II = TII.get(Opc); 1264 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1265 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1266 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1267 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1268 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1269 return ResultReg; 1270 } 1271 1272 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1273 bool LHSIsKill, uint64_t Imm, 1274 bool SetFlags, bool WantResult) { 1275 assert(LHSReg && "Invalid register number."); 1276 1277 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1278 return 0; 1279 1280 unsigned ShiftImm; 1281 if (isUInt<12>(Imm)) 1282 ShiftImm = 0; 1283 else if ((Imm & 0xfff000) == Imm) { 1284 ShiftImm = 12; 1285 Imm >>= 12; 1286 } else 1287 return 0; 1288 1289 static const unsigned OpcTable[2][2][2] = { 1290 { { AArch64::SUBWri, AArch64::SUBXri }, 1291 { AArch64::ADDWri, AArch64::ADDXri } }, 1292 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1293 { AArch64::ADDSWri, AArch64::ADDSXri } } 1294 }; 1295 bool Is64Bit = RetVT == MVT::i64; 1296 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1297 const TargetRegisterClass *RC; 1298 if (SetFlags) 1299 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1300 else 1301 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1302 unsigned ResultReg; 1303 if (WantResult) 1304 ResultReg = createResultReg(RC); 1305 else 1306 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1307 1308 const MCInstrDesc &II = TII.get(Opc); 1309 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1310 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1311 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1312 .addImm(Imm) 1313 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1314 return ResultReg; 1315 } 1316 1317 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1318 bool LHSIsKill, unsigned RHSReg, 1319 bool RHSIsKill, 1320 AArch64_AM::ShiftExtendType ShiftType, 1321 uint64_t ShiftImm, bool SetFlags, 1322 bool WantResult) { 1323 assert(LHSReg && RHSReg && "Invalid register number."); 1324 1325 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1326 return 0; 1327 1328 // Don't deal with undefined shifts. 1329 if (ShiftImm >= RetVT.getSizeInBits()) 1330 return 0; 1331 1332 static const unsigned OpcTable[2][2][2] = { 1333 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1334 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1335 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1336 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1337 }; 1338 bool Is64Bit = RetVT == MVT::i64; 1339 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1340 const TargetRegisterClass *RC = 1341 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1342 unsigned ResultReg; 1343 if (WantResult) 1344 ResultReg = createResultReg(RC); 1345 else 1346 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1347 1348 const MCInstrDesc &II = TII.get(Opc); 1349 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1350 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1351 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1352 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1353 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1354 .addImm(getShifterImm(ShiftType, ShiftImm)); 1355 return ResultReg; 1356 } 1357 1358 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1359 bool LHSIsKill, unsigned RHSReg, 1360 bool RHSIsKill, 1361 AArch64_AM::ShiftExtendType ExtType, 1362 uint64_t ShiftImm, bool SetFlags, 1363 bool WantResult) { 1364 assert(LHSReg && RHSReg && "Invalid register number."); 1365 1366 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1367 return 0; 1368 1369 if (ShiftImm >= 4) 1370 return 0; 1371 1372 static const unsigned OpcTable[2][2][2] = { 1373 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1374 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1375 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1376 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1377 }; 1378 bool Is64Bit = RetVT == MVT::i64; 1379 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1380 const TargetRegisterClass *RC = nullptr; 1381 if (SetFlags) 1382 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1383 else 1384 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1385 unsigned ResultReg; 1386 if (WantResult) 1387 ResultReg = createResultReg(RC); 1388 else 1389 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1390 1391 const MCInstrDesc &II = TII.get(Opc); 1392 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1393 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1395 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1396 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1397 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1398 return ResultReg; 1399 } 1400 1401 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1402 Type *Ty = LHS->getType(); 1403 EVT EVT = TLI.getValueType(DL, Ty, true); 1404 if (!EVT.isSimple()) 1405 return false; 1406 MVT VT = EVT.getSimpleVT(); 1407 1408 switch (VT.SimpleTy) { 1409 default: 1410 return false; 1411 case MVT::i1: 1412 case MVT::i8: 1413 case MVT::i16: 1414 case MVT::i32: 1415 case MVT::i64: 1416 return emitICmp(VT, LHS, RHS, IsZExt); 1417 case MVT::f32: 1418 case MVT::f64: 1419 return emitFCmp(VT, LHS, RHS); 1420 } 1421 } 1422 1423 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1424 bool IsZExt) { 1425 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1426 IsZExt) != 0; 1427 } 1428 1429 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1430 uint64_t Imm) { 1431 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1432 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1433 } 1434 1435 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1436 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1437 return false; 1438 1439 // Check to see if the 2nd operand is a constant that we can encode directly 1440 // in the compare. 1441 bool UseImm = false; 1442 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1443 if (CFP->isZero() && !CFP->isNegative()) 1444 UseImm = true; 1445 1446 unsigned LHSReg = getRegForValue(LHS); 1447 if (!LHSReg) 1448 return false; 1449 bool LHSIsKill = hasTrivialKill(LHS); 1450 1451 if (UseImm) { 1452 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1454 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1455 return true; 1456 } 1457 1458 unsigned RHSReg = getRegForValue(RHS); 1459 if (!RHSReg) 1460 return false; 1461 bool RHSIsKill = hasTrivialKill(RHS); 1462 1463 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1465 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1466 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1467 return true; 1468 } 1469 1470 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1471 bool SetFlags, bool WantResult, bool IsZExt) { 1472 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1473 IsZExt); 1474 } 1475 1476 /// \brief This method is a wrapper to simplify add emission. 1477 /// 1478 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1479 /// that fails, then try to materialize the immediate into a register and use 1480 /// emitAddSub_rr instead. 1481 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1482 int64_t Imm) { 1483 unsigned ResultReg; 1484 if (Imm < 0) 1485 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1486 else 1487 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1488 1489 if (ResultReg) 1490 return ResultReg; 1491 1492 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1493 if (!CReg) 1494 return 0; 1495 1496 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1497 return ResultReg; 1498 } 1499 1500 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1501 bool SetFlags, bool WantResult, bool IsZExt) { 1502 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1503 IsZExt); 1504 } 1505 1506 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1507 bool LHSIsKill, unsigned RHSReg, 1508 bool RHSIsKill, bool WantResult) { 1509 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1510 RHSIsKill, /*SetFlags=*/true, WantResult); 1511 } 1512 1513 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1514 bool LHSIsKill, unsigned RHSReg, 1515 bool RHSIsKill, 1516 AArch64_AM::ShiftExtendType ShiftType, 1517 uint64_t ShiftImm, bool WantResult) { 1518 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1519 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1520 WantResult); 1521 } 1522 1523 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1524 const Value *LHS, const Value *RHS) { 1525 // Canonicalize immediates to the RHS first. 1526 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1527 std::swap(LHS, RHS); 1528 1529 // Canonicalize mul by power-of-2 to the RHS. 1530 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1531 if (isMulPowOf2(LHS)) 1532 std::swap(LHS, RHS); 1533 1534 // Canonicalize shift immediate to the RHS. 1535 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1536 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1537 if (isa<ConstantInt>(SI->getOperand(1))) 1538 std::swap(LHS, RHS); 1539 1540 unsigned LHSReg = getRegForValue(LHS); 1541 if (!LHSReg) 1542 return 0; 1543 bool LHSIsKill = hasTrivialKill(LHS); 1544 1545 unsigned ResultReg = 0; 1546 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1547 uint64_t Imm = C->getZExtValue(); 1548 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1549 } 1550 if (ResultReg) 1551 return ResultReg; 1552 1553 // Check if the mul can be folded into the instruction. 1554 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1555 if (isMulPowOf2(RHS)) { 1556 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1557 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1558 1559 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1560 if (C->getValue().isPowerOf2()) 1561 std::swap(MulLHS, MulRHS); 1562 1563 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1564 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1565 1566 unsigned RHSReg = getRegForValue(MulLHS); 1567 if (!RHSReg) 1568 return 0; 1569 bool RHSIsKill = hasTrivialKill(MulLHS); 1570 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1571 RHSIsKill, ShiftVal); 1572 if (ResultReg) 1573 return ResultReg; 1574 } 1575 } 1576 1577 // Check if the shift can be folded into the instruction. 1578 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1579 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1580 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1581 uint64_t ShiftVal = C->getZExtValue(); 1582 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1583 if (!RHSReg) 1584 return 0; 1585 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1586 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1587 RHSIsKill, ShiftVal); 1588 if (ResultReg) 1589 return ResultReg; 1590 } 1591 } 1592 1593 unsigned RHSReg = getRegForValue(RHS); 1594 if (!RHSReg) 1595 return 0; 1596 bool RHSIsKill = hasTrivialKill(RHS); 1597 1598 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1599 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1600 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1601 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1602 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1603 } 1604 return ResultReg; 1605 } 1606 1607 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1608 unsigned LHSReg, bool LHSIsKill, 1609 uint64_t Imm) { 1610 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1611 "ISD nodes are not consecutive!"); 1612 static const unsigned OpcTable[3][2] = { 1613 { AArch64::ANDWri, AArch64::ANDXri }, 1614 { AArch64::ORRWri, AArch64::ORRXri }, 1615 { AArch64::EORWri, AArch64::EORXri } 1616 }; 1617 const TargetRegisterClass *RC; 1618 unsigned Opc; 1619 unsigned RegSize; 1620 switch (RetVT.SimpleTy) { 1621 default: 1622 return 0; 1623 case MVT::i1: 1624 case MVT::i8: 1625 case MVT::i16: 1626 case MVT::i32: { 1627 unsigned Idx = ISDOpc - ISD::AND; 1628 Opc = OpcTable[Idx][0]; 1629 RC = &AArch64::GPR32spRegClass; 1630 RegSize = 32; 1631 break; 1632 } 1633 case MVT::i64: 1634 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1635 RC = &AArch64::GPR64spRegClass; 1636 RegSize = 64; 1637 break; 1638 } 1639 1640 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1641 return 0; 1642 1643 unsigned ResultReg = 1644 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1645 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1646 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1647 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1648 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1649 } 1650 return ResultReg; 1651 } 1652 1653 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1654 unsigned LHSReg, bool LHSIsKill, 1655 unsigned RHSReg, bool RHSIsKill, 1656 uint64_t ShiftImm) { 1657 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1658 "ISD nodes are not consecutive!"); 1659 static const unsigned OpcTable[3][2] = { 1660 { AArch64::ANDWrs, AArch64::ANDXrs }, 1661 { AArch64::ORRWrs, AArch64::ORRXrs }, 1662 { AArch64::EORWrs, AArch64::EORXrs } 1663 }; 1664 1665 // Don't deal with undefined shifts. 1666 if (ShiftImm >= RetVT.getSizeInBits()) 1667 return 0; 1668 1669 const TargetRegisterClass *RC; 1670 unsigned Opc; 1671 switch (RetVT.SimpleTy) { 1672 default: 1673 return 0; 1674 case MVT::i1: 1675 case MVT::i8: 1676 case MVT::i16: 1677 case MVT::i32: 1678 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1679 RC = &AArch64::GPR32RegClass; 1680 break; 1681 case MVT::i64: 1682 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1683 RC = &AArch64::GPR64RegClass; 1684 break; 1685 } 1686 unsigned ResultReg = 1687 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1688 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1689 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1690 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1691 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1692 } 1693 return ResultReg; 1694 } 1695 1696 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1697 uint64_t Imm) { 1698 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1699 } 1700 1701 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1702 bool WantZExt, MachineMemOperand *MMO) { 1703 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1704 return 0; 1705 1706 // Simplify this down to something we can handle. 1707 if (!simplifyAddress(Addr, VT)) 1708 return 0; 1709 1710 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1711 if (!ScaleFactor) 1712 llvm_unreachable("Unexpected value type."); 1713 1714 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1715 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1716 bool UseScaled = true; 1717 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1718 UseScaled = false; 1719 ScaleFactor = 1; 1720 } 1721 1722 static const unsigned GPOpcTable[2][8][4] = { 1723 // Sign-extend. 1724 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1725 AArch64::LDURXi }, 1726 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1727 AArch64::LDURXi }, 1728 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1729 AArch64::LDRXui }, 1730 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1731 AArch64::LDRXui }, 1732 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1733 AArch64::LDRXroX }, 1734 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1735 AArch64::LDRXroX }, 1736 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1737 AArch64::LDRXroW }, 1738 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1739 AArch64::LDRXroW } 1740 }, 1741 // Zero-extend. 1742 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1743 AArch64::LDURXi }, 1744 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1745 AArch64::LDURXi }, 1746 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1747 AArch64::LDRXui }, 1748 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1749 AArch64::LDRXui }, 1750 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1751 AArch64::LDRXroX }, 1752 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1753 AArch64::LDRXroX }, 1754 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1755 AArch64::LDRXroW }, 1756 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1757 AArch64::LDRXroW } 1758 } 1759 }; 1760 1761 static const unsigned FPOpcTable[4][2] = { 1762 { AArch64::LDURSi, AArch64::LDURDi }, 1763 { AArch64::LDRSui, AArch64::LDRDui }, 1764 { AArch64::LDRSroX, AArch64::LDRDroX }, 1765 { AArch64::LDRSroW, AArch64::LDRDroW } 1766 }; 1767 1768 unsigned Opc; 1769 const TargetRegisterClass *RC; 1770 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1771 Addr.getOffsetReg(); 1772 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1773 if (Addr.getExtendType() == AArch64_AM::UXTW || 1774 Addr.getExtendType() == AArch64_AM::SXTW) 1775 Idx++; 1776 1777 bool IsRet64Bit = RetVT == MVT::i64; 1778 switch (VT.SimpleTy) { 1779 default: 1780 llvm_unreachable("Unexpected value type."); 1781 case MVT::i1: // Intentional fall-through. 1782 case MVT::i8: 1783 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1784 RC = (IsRet64Bit && !WantZExt) ? 1785 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1786 break; 1787 case MVT::i16: 1788 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1789 RC = (IsRet64Bit && !WantZExt) ? 1790 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1791 break; 1792 case MVT::i32: 1793 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1794 RC = (IsRet64Bit && !WantZExt) ? 1795 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1796 break; 1797 case MVT::i64: 1798 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1799 RC = &AArch64::GPR64RegClass; 1800 break; 1801 case MVT::f32: 1802 Opc = FPOpcTable[Idx][0]; 1803 RC = &AArch64::FPR32RegClass; 1804 break; 1805 case MVT::f64: 1806 Opc = FPOpcTable[Idx][1]; 1807 RC = &AArch64::FPR64RegClass; 1808 break; 1809 } 1810 1811 // Create the base instruction, then add the operands. 1812 unsigned ResultReg = createResultReg(RC); 1813 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1814 TII.get(Opc), ResultReg); 1815 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1816 1817 // Loading an i1 requires special handling. 1818 if (VT == MVT::i1) { 1819 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1820 assert(ANDReg && "Unexpected AND instruction emission failure."); 1821 ResultReg = ANDReg; 1822 } 1823 1824 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1825 // the 32bit reg to a 64bit reg. 1826 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1827 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1828 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1829 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1830 .addImm(0) 1831 .addReg(ResultReg, getKillRegState(true)) 1832 .addImm(AArch64::sub_32); 1833 ResultReg = Reg64; 1834 } 1835 return ResultReg; 1836 } 1837 1838 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1839 MVT VT; 1840 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1841 return false; 1842 1843 if (VT.isVector()) 1844 return selectOperator(I, I->getOpcode()); 1845 1846 unsigned ResultReg; 1847 switch (I->getOpcode()) { 1848 default: 1849 llvm_unreachable("Unexpected instruction."); 1850 case Instruction::Add: 1851 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1852 break; 1853 case Instruction::Sub: 1854 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1855 break; 1856 } 1857 if (!ResultReg) 1858 return false; 1859 1860 updateValueMap(I, ResultReg); 1861 return true; 1862 } 1863 1864 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1865 MVT VT; 1866 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1867 return false; 1868 1869 if (VT.isVector()) 1870 return selectOperator(I, I->getOpcode()); 1871 1872 unsigned ResultReg; 1873 switch (I->getOpcode()) { 1874 default: 1875 llvm_unreachable("Unexpected instruction."); 1876 case Instruction::And: 1877 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1878 break; 1879 case Instruction::Or: 1880 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1881 break; 1882 case Instruction::Xor: 1883 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1884 break; 1885 } 1886 if (!ResultReg) 1887 return false; 1888 1889 updateValueMap(I, ResultReg); 1890 return true; 1891 } 1892 1893 bool AArch64FastISel::selectLoad(const Instruction *I) { 1894 MVT VT; 1895 // Verify we have a legal type before going any further. Currently, we handle 1896 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1897 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1898 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1899 cast<LoadInst>(I)->isAtomic()) 1900 return false; 1901 1902 const Value *SV = I->getOperand(0); 1903 if (TLI.supportSwiftError()) { 1904 // Swifterror values can come from either a function parameter with 1905 // swifterror attribute or an alloca with swifterror attribute. 1906 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1907 if (Arg->hasSwiftErrorAttr()) 1908 return false; 1909 } 1910 1911 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1912 if (Alloca->isSwiftError()) 1913 return false; 1914 } 1915 } 1916 1917 // See if we can handle this address. 1918 Address Addr; 1919 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1920 return false; 1921 1922 // Fold the following sign-/zero-extend into the load instruction. 1923 bool WantZExt = true; 1924 MVT RetVT = VT; 1925 const Value *IntExtVal = nullptr; 1926 if (I->hasOneUse()) { 1927 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1928 if (isTypeSupported(ZE->getType(), RetVT)) 1929 IntExtVal = ZE; 1930 else 1931 RetVT = VT; 1932 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1933 if (isTypeSupported(SE->getType(), RetVT)) 1934 IntExtVal = SE; 1935 else 1936 RetVT = VT; 1937 WantZExt = false; 1938 } 1939 } 1940 1941 unsigned ResultReg = 1942 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1943 if (!ResultReg) 1944 return false; 1945 1946 // There are a few different cases we have to handle, because the load or the 1947 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1948 // SelectionDAG. There is also an ordering issue when both instructions are in 1949 // different basic blocks. 1950 // 1.) The load instruction is selected by FastISel, but the integer extend 1951 // not. This usually happens when the integer extend is in a different 1952 // basic block and SelectionDAG took over for that basic block. 1953 // 2.) The load instruction is selected before the integer extend. This only 1954 // happens when the integer extend is in a different basic block. 1955 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1956 // by FastISel. This happens if there are instructions between the load 1957 // and the integer extend that couldn't be selected by FastISel. 1958 if (IntExtVal) { 1959 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1960 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1961 // it when it selects the integer extend. 1962 unsigned Reg = lookUpRegForValue(IntExtVal); 1963 auto *MI = MRI.getUniqueVRegDef(Reg); 1964 if (!MI) { 1965 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1966 if (WantZExt) { 1967 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 1968 std::prev(FuncInfo.InsertPt)->eraseFromParent(); 1969 ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); 1970 } else 1971 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 1972 /*IsKill=*/true, 1973 AArch64::sub_32); 1974 } 1975 updateValueMap(I, ResultReg); 1976 return true; 1977 } 1978 1979 // The integer extend has already been emitted - delete all the instructions 1980 // that have been emitted by the integer extend lowering code and use the 1981 // result from the load instruction directly. 1982 while (MI) { 1983 Reg = 0; 1984 for (auto &Opnd : MI->uses()) { 1985 if (Opnd.isReg()) { 1986 Reg = Opnd.getReg(); 1987 break; 1988 } 1989 } 1990 MI->eraseFromParent(); 1991 MI = nullptr; 1992 if (Reg) 1993 MI = MRI.getUniqueVRegDef(Reg); 1994 } 1995 updateValueMap(IntExtVal, ResultReg); 1996 return true; 1997 } 1998 1999 updateValueMap(I, ResultReg); 2000 return true; 2001 } 2002 2003 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2004 unsigned AddrReg, 2005 MachineMemOperand *MMO) { 2006 unsigned Opc; 2007 switch (VT.SimpleTy) { 2008 default: return false; 2009 case MVT::i8: Opc = AArch64::STLRB; break; 2010 case MVT::i16: Opc = AArch64::STLRH; break; 2011 case MVT::i32: Opc = AArch64::STLRW; break; 2012 case MVT::i64: Opc = AArch64::STLRX; break; 2013 } 2014 2015 const MCInstrDesc &II = TII.get(Opc); 2016 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2017 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2019 .addReg(SrcReg) 2020 .addReg(AddrReg) 2021 .addMemOperand(MMO); 2022 return true; 2023 } 2024 2025 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2026 MachineMemOperand *MMO) { 2027 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2028 return false; 2029 2030 // Simplify this down to something we can handle. 2031 if (!simplifyAddress(Addr, VT)) 2032 return false; 2033 2034 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2035 if (!ScaleFactor) 2036 llvm_unreachable("Unexpected value type."); 2037 2038 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2039 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2040 bool UseScaled = true; 2041 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2042 UseScaled = false; 2043 ScaleFactor = 1; 2044 } 2045 2046 static const unsigned OpcTable[4][6] = { 2047 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2048 AArch64::STURSi, AArch64::STURDi }, 2049 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2050 AArch64::STRSui, AArch64::STRDui }, 2051 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2052 AArch64::STRSroX, AArch64::STRDroX }, 2053 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2054 AArch64::STRSroW, AArch64::STRDroW } 2055 }; 2056 2057 unsigned Opc; 2058 bool VTIsi1 = false; 2059 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2060 Addr.getOffsetReg(); 2061 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2062 if (Addr.getExtendType() == AArch64_AM::UXTW || 2063 Addr.getExtendType() == AArch64_AM::SXTW) 2064 Idx++; 2065 2066 switch (VT.SimpleTy) { 2067 default: llvm_unreachable("Unexpected value type."); 2068 case MVT::i1: VTIsi1 = true; 2069 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2070 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2071 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2072 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2073 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2074 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2075 } 2076 2077 // Storing an i1 requires special handling. 2078 if (VTIsi1 && SrcReg != AArch64::WZR) { 2079 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2080 assert(ANDReg && "Unexpected AND instruction emission failure."); 2081 SrcReg = ANDReg; 2082 } 2083 // Create the base instruction, then add the operands. 2084 const MCInstrDesc &II = TII.get(Opc); 2085 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2086 MachineInstrBuilder MIB = 2087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2088 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2089 2090 return true; 2091 } 2092 2093 bool AArch64FastISel::selectStore(const Instruction *I) { 2094 MVT VT; 2095 const Value *Op0 = I->getOperand(0); 2096 // Verify we have a legal type before going any further. Currently, we handle 2097 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2098 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2099 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2100 return false; 2101 2102 const Value *PtrV = I->getOperand(1); 2103 if (TLI.supportSwiftError()) { 2104 // Swifterror values can come from either a function parameter with 2105 // swifterror attribute or an alloca with swifterror attribute. 2106 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2107 if (Arg->hasSwiftErrorAttr()) 2108 return false; 2109 } 2110 2111 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2112 if (Alloca->isSwiftError()) 2113 return false; 2114 } 2115 } 2116 2117 // Get the value to be stored into a register. Use the zero register directly 2118 // when possible to avoid an unnecessary copy and a wasted register. 2119 unsigned SrcReg = 0; 2120 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2121 if (CI->isZero()) 2122 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2123 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2124 if (CF->isZero() && !CF->isNegative()) { 2125 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2126 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2127 } 2128 } 2129 2130 if (!SrcReg) 2131 SrcReg = getRegForValue(Op0); 2132 2133 if (!SrcReg) 2134 return false; 2135 2136 auto *SI = cast<StoreInst>(I); 2137 2138 // Try to emit a STLR for seq_cst/release. 2139 if (SI->isAtomic()) { 2140 AtomicOrdering Ord = SI->getOrdering(); 2141 // The non-atomic instructions are sufficient for relaxed stores. 2142 if (isReleaseOrStronger(Ord)) { 2143 // The STLR addressing mode only supports a base reg; pass that directly. 2144 unsigned AddrReg = getRegForValue(PtrV); 2145 return emitStoreRelease(VT, SrcReg, AddrReg, 2146 createMachineMemOperandFor(I)); 2147 } 2148 } 2149 2150 // See if we can handle this address. 2151 Address Addr; 2152 if (!computeAddress(PtrV, Addr, Op0->getType())) 2153 return false; 2154 2155 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2156 return false; 2157 return true; 2158 } 2159 2160 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2161 switch (Pred) { 2162 case CmpInst::FCMP_ONE: 2163 case CmpInst::FCMP_UEQ: 2164 default: 2165 // AL is our "false" for now. The other two need more compares. 2166 return AArch64CC::AL; 2167 case CmpInst::ICMP_EQ: 2168 case CmpInst::FCMP_OEQ: 2169 return AArch64CC::EQ; 2170 case CmpInst::ICMP_SGT: 2171 case CmpInst::FCMP_OGT: 2172 return AArch64CC::GT; 2173 case CmpInst::ICMP_SGE: 2174 case CmpInst::FCMP_OGE: 2175 return AArch64CC::GE; 2176 case CmpInst::ICMP_UGT: 2177 case CmpInst::FCMP_UGT: 2178 return AArch64CC::HI; 2179 case CmpInst::FCMP_OLT: 2180 return AArch64CC::MI; 2181 case CmpInst::ICMP_ULE: 2182 case CmpInst::FCMP_OLE: 2183 return AArch64CC::LS; 2184 case CmpInst::FCMP_ORD: 2185 return AArch64CC::VC; 2186 case CmpInst::FCMP_UNO: 2187 return AArch64CC::VS; 2188 case CmpInst::FCMP_UGE: 2189 return AArch64CC::PL; 2190 case CmpInst::ICMP_SLT: 2191 case CmpInst::FCMP_ULT: 2192 return AArch64CC::LT; 2193 case CmpInst::ICMP_SLE: 2194 case CmpInst::FCMP_ULE: 2195 return AArch64CC::LE; 2196 case CmpInst::FCMP_UNE: 2197 case CmpInst::ICMP_NE: 2198 return AArch64CC::NE; 2199 case CmpInst::ICMP_UGE: 2200 return AArch64CC::HS; 2201 case CmpInst::ICMP_ULT: 2202 return AArch64CC::LO; 2203 } 2204 } 2205 2206 /// \brief Try to emit a combined compare-and-branch instruction. 2207 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2208 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2209 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2210 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2211 2212 const Value *LHS = CI->getOperand(0); 2213 const Value *RHS = CI->getOperand(1); 2214 2215 MVT VT; 2216 if (!isTypeSupported(LHS->getType(), VT)) 2217 return false; 2218 2219 unsigned BW = VT.getSizeInBits(); 2220 if (BW > 64) 2221 return false; 2222 2223 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2224 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2225 2226 // Try to take advantage of fallthrough opportunities. 2227 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2228 std::swap(TBB, FBB); 2229 Predicate = CmpInst::getInversePredicate(Predicate); 2230 } 2231 2232 int TestBit = -1; 2233 bool IsCmpNE; 2234 switch (Predicate) { 2235 default: 2236 return false; 2237 case CmpInst::ICMP_EQ: 2238 case CmpInst::ICMP_NE: 2239 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2240 std::swap(LHS, RHS); 2241 2242 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2243 return false; 2244 2245 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2246 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2247 const Value *AndLHS = AI->getOperand(0); 2248 const Value *AndRHS = AI->getOperand(1); 2249 2250 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2251 if (C->getValue().isPowerOf2()) 2252 std::swap(AndLHS, AndRHS); 2253 2254 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2255 if (C->getValue().isPowerOf2()) { 2256 TestBit = C->getValue().logBase2(); 2257 LHS = AndLHS; 2258 } 2259 } 2260 2261 if (VT == MVT::i1) 2262 TestBit = 0; 2263 2264 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2265 break; 2266 case CmpInst::ICMP_SLT: 2267 case CmpInst::ICMP_SGE: 2268 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2269 return false; 2270 2271 TestBit = BW - 1; 2272 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2273 break; 2274 case CmpInst::ICMP_SGT: 2275 case CmpInst::ICMP_SLE: 2276 if (!isa<ConstantInt>(RHS)) 2277 return false; 2278 2279 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2280 return false; 2281 2282 TestBit = BW - 1; 2283 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2284 break; 2285 } // end switch 2286 2287 static const unsigned OpcTable[2][2][2] = { 2288 { {AArch64::CBZW, AArch64::CBZX }, 2289 {AArch64::CBNZW, AArch64::CBNZX} }, 2290 { {AArch64::TBZW, AArch64::TBZX }, 2291 {AArch64::TBNZW, AArch64::TBNZX} } 2292 }; 2293 2294 bool IsBitTest = TestBit != -1; 2295 bool Is64Bit = BW == 64; 2296 if (TestBit < 32 && TestBit >= 0) 2297 Is64Bit = false; 2298 2299 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2300 const MCInstrDesc &II = TII.get(Opc); 2301 2302 unsigned SrcReg = getRegForValue(LHS); 2303 if (!SrcReg) 2304 return false; 2305 bool SrcIsKill = hasTrivialKill(LHS); 2306 2307 if (BW == 64 && !Is64Bit) 2308 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2309 AArch64::sub_32); 2310 2311 if ((BW < 32) && !IsBitTest) 2312 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); 2313 2314 // Emit the combined compare and branch instruction. 2315 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2316 MachineInstrBuilder MIB = 2317 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2318 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2319 if (IsBitTest) 2320 MIB.addImm(TestBit); 2321 MIB.addMBB(TBB); 2322 2323 finishCondBranch(BI->getParent(), TBB, FBB); 2324 return true; 2325 } 2326 2327 bool AArch64FastISel::selectBranch(const Instruction *I) { 2328 const BranchInst *BI = cast<BranchInst>(I); 2329 if (BI->isUnconditional()) { 2330 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2331 fastEmitBranch(MSucc, BI->getDebugLoc()); 2332 return true; 2333 } 2334 2335 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2336 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2337 2338 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2339 if (CI->hasOneUse() && isValueAvailable(CI)) { 2340 // Try to optimize or fold the cmp. 2341 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2342 switch (Predicate) { 2343 default: 2344 break; 2345 case CmpInst::FCMP_FALSE: 2346 fastEmitBranch(FBB, DbgLoc); 2347 return true; 2348 case CmpInst::FCMP_TRUE: 2349 fastEmitBranch(TBB, DbgLoc); 2350 return true; 2351 } 2352 2353 // Try to emit a combined compare-and-branch first. 2354 if (emitCompareAndBranch(BI)) 2355 return true; 2356 2357 // Try to take advantage of fallthrough opportunities. 2358 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2359 std::swap(TBB, FBB); 2360 Predicate = CmpInst::getInversePredicate(Predicate); 2361 } 2362 2363 // Emit the cmp. 2364 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2365 return false; 2366 2367 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2368 // instruction. 2369 AArch64CC::CondCode CC = getCompareCC(Predicate); 2370 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2371 switch (Predicate) { 2372 default: 2373 break; 2374 case CmpInst::FCMP_UEQ: 2375 ExtraCC = AArch64CC::EQ; 2376 CC = AArch64CC::VS; 2377 break; 2378 case CmpInst::FCMP_ONE: 2379 ExtraCC = AArch64CC::MI; 2380 CC = AArch64CC::GT; 2381 break; 2382 } 2383 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2384 2385 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2386 if (ExtraCC != AArch64CC::AL) { 2387 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2388 .addImm(ExtraCC) 2389 .addMBB(TBB); 2390 } 2391 2392 // Emit the branch. 2393 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2394 .addImm(CC) 2395 .addMBB(TBB); 2396 2397 finishCondBranch(BI->getParent(), TBB, FBB); 2398 return true; 2399 } 2400 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2401 uint64_t Imm = CI->getZExtValue(); 2402 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2403 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2404 .addMBB(Target); 2405 2406 // Obtain the branch probability and add the target to the successor list. 2407 if (FuncInfo.BPI) { 2408 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2409 BI->getParent(), Target->getBasicBlock()); 2410 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2411 } else 2412 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2413 return true; 2414 } else { 2415 AArch64CC::CondCode CC = AArch64CC::NE; 2416 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2417 // Fake request the condition, otherwise the intrinsic might be completely 2418 // optimized away. 2419 unsigned CondReg = getRegForValue(BI->getCondition()); 2420 if (!CondReg) 2421 return false; 2422 2423 // Emit the branch. 2424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2425 .addImm(CC) 2426 .addMBB(TBB); 2427 2428 finishCondBranch(BI->getParent(), TBB, FBB); 2429 return true; 2430 } 2431 } 2432 2433 unsigned CondReg = getRegForValue(BI->getCondition()); 2434 if (CondReg == 0) 2435 return false; 2436 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2437 2438 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2439 unsigned Opcode = AArch64::TBNZW; 2440 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2441 std::swap(TBB, FBB); 2442 Opcode = AArch64::TBZW; 2443 } 2444 2445 const MCInstrDesc &II = TII.get(Opcode); 2446 unsigned ConstrainedCondReg 2447 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2449 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2450 .addImm(0) 2451 .addMBB(TBB); 2452 2453 finishCondBranch(BI->getParent(), TBB, FBB); 2454 return true; 2455 } 2456 2457 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2458 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2459 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2460 if (AddrReg == 0) 2461 return false; 2462 2463 // Emit the indirect branch. 2464 const MCInstrDesc &II = TII.get(AArch64::BR); 2465 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2467 2468 // Make sure the CFG is up-to-date. 2469 for (auto *Succ : BI->successors()) 2470 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2471 2472 return true; 2473 } 2474 2475 bool AArch64FastISel::selectCmp(const Instruction *I) { 2476 const CmpInst *CI = cast<CmpInst>(I); 2477 2478 // Vectors of i1 are weird: bail out. 2479 if (CI->getType()->isVectorTy()) 2480 return false; 2481 2482 // Try to optimize or fold the cmp. 2483 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2484 unsigned ResultReg = 0; 2485 switch (Predicate) { 2486 default: 2487 break; 2488 case CmpInst::FCMP_FALSE: 2489 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2491 TII.get(TargetOpcode::COPY), ResultReg) 2492 .addReg(AArch64::WZR, getKillRegState(true)); 2493 break; 2494 case CmpInst::FCMP_TRUE: 2495 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2496 break; 2497 } 2498 2499 if (ResultReg) { 2500 updateValueMap(I, ResultReg); 2501 return true; 2502 } 2503 2504 // Emit the cmp. 2505 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2506 return false; 2507 2508 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2509 2510 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2511 // condition codes are inverted, because they are used by CSINC. 2512 static unsigned CondCodeTable[2][2] = { 2513 { AArch64CC::NE, AArch64CC::VC }, 2514 { AArch64CC::PL, AArch64CC::LE } 2515 }; 2516 unsigned *CondCodes = nullptr; 2517 switch (Predicate) { 2518 default: 2519 break; 2520 case CmpInst::FCMP_UEQ: 2521 CondCodes = &CondCodeTable[0][0]; 2522 break; 2523 case CmpInst::FCMP_ONE: 2524 CondCodes = &CondCodeTable[1][0]; 2525 break; 2526 } 2527 2528 if (CondCodes) { 2529 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2531 TmpReg1) 2532 .addReg(AArch64::WZR, getKillRegState(true)) 2533 .addReg(AArch64::WZR, getKillRegState(true)) 2534 .addImm(CondCodes[0]); 2535 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2536 ResultReg) 2537 .addReg(TmpReg1, getKillRegState(true)) 2538 .addReg(AArch64::WZR, getKillRegState(true)) 2539 .addImm(CondCodes[1]); 2540 2541 updateValueMap(I, ResultReg); 2542 return true; 2543 } 2544 2545 // Now set a register based on the comparison. 2546 AArch64CC::CondCode CC = getCompareCC(Predicate); 2547 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2548 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2550 ResultReg) 2551 .addReg(AArch64::WZR, getKillRegState(true)) 2552 .addReg(AArch64::WZR, getKillRegState(true)) 2553 .addImm(invertedCC); 2554 2555 updateValueMap(I, ResultReg); 2556 return true; 2557 } 2558 2559 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' 2560 /// value. 2561 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2562 if (!SI->getType()->isIntegerTy(1)) 2563 return false; 2564 2565 const Value *Src1Val, *Src2Val; 2566 unsigned Opc = 0; 2567 bool NeedExtraOp = false; 2568 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2569 if (CI->isOne()) { 2570 Src1Val = SI->getCondition(); 2571 Src2Val = SI->getFalseValue(); 2572 Opc = AArch64::ORRWrr; 2573 } else { 2574 assert(CI->isZero()); 2575 Src1Val = SI->getFalseValue(); 2576 Src2Val = SI->getCondition(); 2577 Opc = AArch64::BICWrr; 2578 } 2579 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2580 if (CI->isOne()) { 2581 Src1Val = SI->getCondition(); 2582 Src2Val = SI->getTrueValue(); 2583 Opc = AArch64::ORRWrr; 2584 NeedExtraOp = true; 2585 } else { 2586 assert(CI->isZero()); 2587 Src1Val = SI->getCondition(); 2588 Src2Val = SI->getTrueValue(); 2589 Opc = AArch64::ANDWrr; 2590 } 2591 } 2592 2593 if (!Opc) 2594 return false; 2595 2596 unsigned Src1Reg = getRegForValue(Src1Val); 2597 if (!Src1Reg) 2598 return false; 2599 bool Src1IsKill = hasTrivialKill(Src1Val); 2600 2601 unsigned Src2Reg = getRegForValue(Src2Val); 2602 if (!Src2Reg) 2603 return false; 2604 bool Src2IsKill = hasTrivialKill(Src2Val); 2605 2606 if (NeedExtraOp) { 2607 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2608 Src1IsKill = true; 2609 } 2610 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2611 Src1IsKill, Src2Reg, Src2IsKill); 2612 updateValueMap(SI, ResultReg); 2613 return true; 2614 } 2615 2616 bool AArch64FastISel::selectSelect(const Instruction *I) { 2617 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2618 MVT VT; 2619 if (!isTypeSupported(I->getType(), VT)) 2620 return false; 2621 2622 unsigned Opc; 2623 const TargetRegisterClass *RC; 2624 switch (VT.SimpleTy) { 2625 default: 2626 return false; 2627 case MVT::i1: 2628 case MVT::i8: 2629 case MVT::i16: 2630 case MVT::i32: 2631 Opc = AArch64::CSELWr; 2632 RC = &AArch64::GPR32RegClass; 2633 break; 2634 case MVT::i64: 2635 Opc = AArch64::CSELXr; 2636 RC = &AArch64::GPR64RegClass; 2637 break; 2638 case MVT::f32: 2639 Opc = AArch64::FCSELSrrr; 2640 RC = &AArch64::FPR32RegClass; 2641 break; 2642 case MVT::f64: 2643 Opc = AArch64::FCSELDrrr; 2644 RC = &AArch64::FPR64RegClass; 2645 break; 2646 } 2647 2648 const SelectInst *SI = cast<SelectInst>(I); 2649 const Value *Cond = SI->getCondition(); 2650 AArch64CC::CondCode CC = AArch64CC::NE; 2651 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2652 2653 if (optimizeSelect(SI)) 2654 return true; 2655 2656 // Try to pickup the flags, so we don't have to emit another compare. 2657 if (foldXALUIntrinsic(CC, I, Cond)) { 2658 // Fake request the condition to force emission of the XALU intrinsic. 2659 unsigned CondReg = getRegForValue(Cond); 2660 if (!CondReg) 2661 return false; 2662 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2663 isValueAvailable(Cond)) { 2664 const auto *Cmp = cast<CmpInst>(Cond); 2665 // Try to optimize or fold the cmp. 2666 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2667 const Value *FoldSelect = nullptr; 2668 switch (Predicate) { 2669 default: 2670 break; 2671 case CmpInst::FCMP_FALSE: 2672 FoldSelect = SI->getFalseValue(); 2673 break; 2674 case CmpInst::FCMP_TRUE: 2675 FoldSelect = SI->getTrueValue(); 2676 break; 2677 } 2678 2679 if (FoldSelect) { 2680 unsigned SrcReg = getRegForValue(FoldSelect); 2681 if (!SrcReg) 2682 return false; 2683 unsigned UseReg = lookUpRegForValue(SI); 2684 if (UseReg) 2685 MRI.clearKillFlags(UseReg); 2686 2687 updateValueMap(I, SrcReg); 2688 return true; 2689 } 2690 2691 // Emit the cmp. 2692 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2693 return false; 2694 2695 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2696 CC = getCompareCC(Predicate); 2697 switch (Predicate) { 2698 default: 2699 break; 2700 case CmpInst::FCMP_UEQ: 2701 ExtraCC = AArch64CC::EQ; 2702 CC = AArch64CC::VS; 2703 break; 2704 case CmpInst::FCMP_ONE: 2705 ExtraCC = AArch64CC::MI; 2706 CC = AArch64CC::GT; 2707 break; 2708 } 2709 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2710 } else { 2711 unsigned CondReg = getRegForValue(Cond); 2712 if (!CondReg) 2713 return false; 2714 bool CondIsKill = hasTrivialKill(Cond); 2715 2716 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2717 CondReg = constrainOperandRegClass(II, CondReg, 1); 2718 2719 // Emit a TST instruction (ANDS wzr, reg, #imm). 2720 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2721 AArch64::WZR) 2722 .addReg(CondReg, getKillRegState(CondIsKill)) 2723 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2724 } 2725 2726 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2727 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2728 2729 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2730 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2731 2732 if (!Src1Reg || !Src2Reg) 2733 return false; 2734 2735 if (ExtraCC != AArch64CC::AL) { 2736 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2737 Src2IsKill, ExtraCC); 2738 Src2IsKill = true; 2739 } 2740 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2741 Src2IsKill, CC); 2742 updateValueMap(I, ResultReg); 2743 return true; 2744 } 2745 2746 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2747 Value *V = I->getOperand(0); 2748 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2749 return false; 2750 2751 unsigned Op = getRegForValue(V); 2752 if (Op == 0) 2753 return false; 2754 2755 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2756 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2757 ResultReg).addReg(Op); 2758 updateValueMap(I, ResultReg); 2759 return true; 2760 } 2761 2762 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2763 Value *V = I->getOperand(0); 2764 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2765 return false; 2766 2767 unsigned Op = getRegForValue(V); 2768 if (Op == 0) 2769 return false; 2770 2771 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2773 ResultReg).addReg(Op); 2774 updateValueMap(I, ResultReg); 2775 return true; 2776 } 2777 2778 // FPToUI and FPToSI 2779 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2780 MVT DestVT; 2781 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2782 return false; 2783 2784 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2785 if (SrcReg == 0) 2786 return false; 2787 2788 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2789 if (SrcVT == MVT::f128) 2790 return false; 2791 2792 unsigned Opc; 2793 if (SrcVT == MVT::f64) { 2794 if (Signed) 2795 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2796 else 2797 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2798 } else { 2799 if (Signed) 2800 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2801 else 2802 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2803 } 2804 unsigned ResultReg = createResultReg( 2805 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2806 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2807 .addReg(SrcReg); 2808 updateValueMap(I, ResultReg); 2809 return true; 2810 } 2811 2812 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2813 MVT DestVT; 2814 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2815 return false; 2816 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && 2817 "Unexpected value type."); 2818 2819 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2820 if (!SrcReg) 2821 return false; 2822 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2823 2824 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2825 2826 // Handle sign-extension. 2827 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2828 SrcReg = 2829 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2830 if (!SrcReg) 2831 return false; 2832 SrcIsKill = true; 2833 } 2834 2835 unsigned Opc; 2836 if (SrcVT == MVT::i64) { 2837 if (Signed) 2838 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2839 else 2840 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2841 } else { 2842 if (Signed) 2843 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2844 else 2845 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2846 } 2847 2848 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2849 SrcIsKill); 2850 updateValueMap(I, ResultReg); 2851 return true; 2852 } 2853 2854 bool AArch64FastISel::fastLowerArguments() { 2855 if (!FuncInfo.CanLowerReturn) 2856 return false; 2857 2858 const Function *F = FuncInfo.Fn; 2859 if (F->isVarArg()) 2860 return false; 2861 2862 CallingConv::ID CC = F->getCallingConv(); 2863 if (CC != CallingConv::C && CC != CallingConv::Swift) 2864 return false; 2865 2866 // Only handle simple cases of up to 8 GPR and FPR each. 2867 unsigned GPRCnt = 0; 2868 unsigned FPRCnt = 0; 2869 unsigned Idx = 0; 2870 for (auto const &Arg : F->args()) { 2871 // The first argument is at index 1. 2872 ++Idx; 2873 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2874 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2875 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2876 F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || 2877 F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || 2878 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2879 return false; 2880 2881 Type *ArgTy = Arg.getType(); 2882 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2883 return false; 2884 2885 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2886 if (!ArgVT.isSimple()) 2887 return false; 2888 2889 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2890 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2891 return false; 2892 2893 if (VT.isVector() && 2894 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2895 return false; 2896 2897 if (VT >= MVT::i1 && VT <= MVT::i64) 2898 ++GPRCnt; 2899 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2900 VT.is128BitVector()) 2901 ++FPRCnt; 2902 else 2903 return false; 2904 2905 if (GPRCnt > 8 || FPRCnt > 8) 2906 return false; 2907 } 2908 2909 static const MCPhysReg Registers[6][8] = { 2910 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2911 AArch64::W5, AArch64::W6, AArch64::W7 }, 2912 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2913 AArch64::X5, AArch64::X6, AArch64::X7 }, 2914 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2915 AArch64::H5, AArch64::H6, AArch64::H7 }, 2916 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2917 AArch64::S5, AArch64::S6, AArch64::S7 }, 2918 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2919 AArch64::D5, AArch64::D6, AArch64::D7 }, 2920 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2921 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2922 }; 2923 2924 unsigned GPRIdx = 0; 2925 unsigned FPRIdx = 0; 2926 for (auto const &Arg : F->args()) { 2927 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2928 unsigned SrcReg; 2929 const TargetRegisterClass *RC; 2930 if (VT >= MVT::i1 && VT <= MVT::i32) { 2931 SrcReg = Registers[0][GPRIdx++]; 2932 RC = &AArch64::GPR32RegClass; 2933 VT = MVT::i32; 2934 } else if (VT == MVT::i64) { 2935 SrcReg = Registers[1][GPRIdx++]; 2936 RC = &AArch64::GPR64RegClass; 2937 } else if (VT == MVT::f16) { 2938 SrcReg = Registers[2][FPRIdx++]; 2939 RC = &AArch64::FPR16RegClass; 2940 } else if (VT == MVT::f32) { 2941 SrcReg = Registers[3][FPRIdx++]; 2942 RC = &AArch64::FPR32RegClass; 2943 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2944 SrcReg = Registers[4][FPRIdx++]; 2945 RC = &AArch64::FPR64RegClass; 2946 } else if (VT.is128BitVector()) { 2947 SrcReg = Registers[5][FPRIdx++]; 2948 RC = &AArch64::FPR128RegClass; 2949 } else 2950 llvm_unreachable("Unexpected value type."); 2951 2952 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2953 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2954 // Without this, EmitLiveInCopies may eliminate the livein if its only 2955 // use is a bitcast (which isn't turned into an instruction). 2956 unsigned ResultReg = createResultReg(RC); 2957 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2958 TII.get(TargetOpcode::COPY), ResultReg) 2959 .addReg(DstReg, getKillRegState(true)); 2960 updateValueMap(&Arg, ResultReg); 2961 } 2962 return true; 2963 } 2964 2965 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2966 SmallVectorImpl<MVT> &OutVTs, 2967 unsigned &NumBytes) { 2968 CallingConv::ID CC = CLI.CallConv; 2969 SmallVector<CCValAssign, 16> ArgLocs; 2970 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2971 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2972 2973 // Get a count of how many bytes are to be pushed on the stack. 2974 NumBytes = CCInfo.getNextStackOffset(); 2975 2976 // Issue CALLSEQ_START 2977 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2978 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2979 .addImm(NumBytes); 2980 2981 // Process the args. 2982 for (CCValAssign &VA : ArgLocs) { 2983 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 2984 MVT ArgVT = OutVTs[VA.getValNo()]; 2985 2986 unsigned ArgReg = getRegForValue(ArgVal); 2987 if (!ArgReg) 2988 return false; 2989 2990 // Handle arg promotion: SExt, ZExt, AExt. 2991 switch (VA.getLocInfo()) { 2992 case CCValAssign::Full: 2993 break; 2994 case CCValAssign::SExt: { 2995 MVT DestVT = VA.getLocVT(); 2996 MVT SrcVT = ArgVT; 2997 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 2998 if (!ArgReg) 2999 return false; 3000 break; 3001 } 3002 case CCValAssign::AExt: 3003 // Intentional fall-through. 3004 case CCValAssign::ZExt: { 3005 MVT DestVT = VA.getLocVT(); 3006 MVT SrcVT = ArgVT; 3007 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3008 if (!ArgReg) 3009 return false; 3010 break; 3011 } 3012 default: 3013 llvm_unreachable("Unknown arg promotion!"); 3014 } 3015 3016 // Now copy/store arg to correct locations. 3017 if (VA.isRegLoc() && !VA.needsCustom()) { 3018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3019 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3020 CLI.OutRegs.push_back(VA.getLocReg()); 3021 } else if (VA.needsCustom()) { 3022 // FIXME: Handle custom args. 3023 return false; 3024 } else { 3025 assert(VA.isMemLoc() && "Assuming store on stack."); 3026 3027 // Don't emit stores for undef values. 3028 if (isa<UndefValue>(ArgVal)) 3029 continue; 3030 3031 // Need to store on the stack. 3032 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3033 3034 unsigned BEAlign = 0; 3035 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3036 BEAlign = 8 - ArgSize; 3037 3038 Address Addr; 3039 Addr.setKind(Address::RegBase); 3040 Addr.setReg(AArch64::SP); 3041 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3042 3043 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3044 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3045 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3046 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3047 3048 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3049 return false; 3050 } 3051 } 3052 return true; 3053 } 3054 3055 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3056 unsigned NumBytes) { 3057 CallingConv::ID CC = CLI.CallConv; 3058 3059 // Issue CALLSEQ_END 3060 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3061 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3062 .addImm(NumBytes).addImm(0); 3063 3064 // Now the return value. 3065 if (RetVT != MVT::isVoid) { 3066 SmallVector<CCValAssign, 16> RVLocs; 3067 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3068 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3069 3070 // Only handle a single return value. 3071 if (RVLocs.size() != 1) 3072 return false; 3073 3074 // Copy all of the result registers out of their specified physreg. 3075 MVT CopyVT = RVLocs[0].getValVT(); 3076 3077 // TODO: Handle big-endian results 3078 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3079 return false; 3080 3081 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3083 TII.get(TargetOpcode::COPY), ResultReg) 3084 .addReg(RVLocs[0].getLocReg()); 3085 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3086 3087 CLI.ResultReg = ResultReg; 3088 CLI.NumResultRegs = 1; 3089 } 3090 3091 return true; 3092 } 3093 3094 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3095 CallingConv::ID CC = CLI.CallConv; 3096 bool IsTailCall = CLI.IsTailCall; 3097 bool IsVarArg = CLI.IsVarArg; 3098 const Value *Callee = CLI.Callee; 3099 MCSymbol *Symbol = CLI.Symbol; 3100 3101 if (!Callee && !Symbol) 3102 return false; 3103 3104 // Allow SelectionDAG isel to handle tail calls. 3105 if (IsTailCall) 3106 return false; 3107 3108 CodeModel::Model CM = TM.getCodeModel(); 3109 // Only support the small and large code model. 3110 if (CM != CodeModel::Small && CM != CodeModel::Large) 3111 return false; 3112 3113 // FIXME: Add large code model support for ELF. 3114 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3115 return false; 3116 3117 // Let SDISel handle vararg functions. 3118 if (IsVarArg) 3119 return false; 3120 3121 // FIXME: Only handle *simple* calls for now. 3122 MVT RetVT; 3123 if (CLI.RetTy->isVoidTy()) 3124 RetVT = MVT::isVoid; 3125 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3126 return false; 3127 3128 for (auto Flag : CLI.OutFlags) 3129 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3130 Flag.isSwiftSelf() || Flag.isSwiftError()) 3131 return false; 3132 3133 // Set up the argument vectors. 3134 SmallVector<MVT, 16> OutVTs; 3135 OutVTs.reserve(CLI.OutVals.size()); 3136 3137 for (auto *Val : CLI.OutVals) { 3138 MVT VT; 3139 if (!isTypeLegal(Val->getType(), VT) && 3140 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3141 return false; 3142 3143 // We don't handle vector parameters yet. 3144 if (VT.isVector() || VT.getSizeInBits() > 64) 3145 return false; 3146 3147 OutVTs.push_back(VT); 3148 } 3149 3150 Address Addr; 3151 if (Callee && !computeCallAddress(Callee, Addr)) 3152 return false; 3153 3154 // Handle the arguments now that we've gotten them. 3155 unsigned NumBytes; 3156 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3157 return false; 3158 3159 // Issue the call. 3160 MachineInstrBuilder MIB; 3161 if (CM == CodeModel::Small) { 3162 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3163 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3164 if (Symbol) 3165 MIB.addSym(Symbol, 0); 3166 else if (Addr.getGlobalValue()) 3167 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3168 else if (Addr.getReg()) { 3169 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3170 MIB.addReg(Reg); 3171 } else 3172 return false; 3173 } else { 3174 unsigned CallReg = 0; 3175 if (Symbol) { 3176 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3177 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3178 ADRPReg) 3179 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3180 3181 CallReg = createResultReg(&AArch64::GPR64RegClass); 3182 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3183 TII.get(AArch64::LDRXui), CallReg) 3184 .addReg(ADRPReg) 3185 .addSym(Symbol, 3186 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3187 } else if (Addr.getGlobalValue()) 3188 CallReg = materializeGV(Addr.getGlobalValue()); 3189 else if (Addr.getReg()) 3190 CallReg = Addr.getReg(); 3191 3192 if (!CallReg) 3193 return false; 3194 3195 const MCInstrDesc &II = TII.get(AArch64::BLR); 3196 CallReg = constrainOperandRegClass(II, CallReg, 0); 3197 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3198 } 3199 3200 // Add implicit physical register uses to the call. 3201 for (auto Reg : CLI.OutRegs) 3202 MIB.addReg(Reg, RegState::Implicit); 3203 3204 // Add a register mask with the call-preserved registers. 3205 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3206 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3207 3208 CLI.Call = MIB; 3209 3210 // Finish off the call including any return values. 3211 return finishCall(CLI, RetVT, NumBytes); 3212 } 3213 3214 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3215 if (Alignment) 3216 return Len / Alignment <= 4; 3217 else 3218 return Len < 32; 3219 } 3220 3221 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3222 uint64_t Len, unsigned Alignment) { 3223 // Make sure we don't bloat code by inlining very large memcpy's. 3224 if (!isMemCpySmall(Len, Alignment)) 3225 return false; 3226 3227 int64_t UnscaledOffset = 0; 3228 Address OrigDest = Dest; 3229 Address OrigSrc = Src; 3230 3231 while (Len) { 3232 MVT VT; 3233 if (!Alignment || Alignment >= 8) { 3234 if (Len >= 8) 3235 VT = MVT::i64; 3236 else if (Len >= 4) 3237 VT = MVT::i32; 3238 else if (Len >= 2) 3239 VT = MVT::i16; 3240 else { 3241 VT = MVT::i8; 3242 } 3243 } else { 3244 // Bound based on alignment. 3245 if (Len >= 4 && Alignment == 4) 3246 VT = MVT::i32; 3247 else if (Len >= 2 && Alignment == 2) 3248 VT = MVT::i16; 3249 else { 3250 VT = MVT::i8; 3251 } 3252 } 3253 3254 unsigned ResultReg = emitLoad(VT, VT, Src); 3255 if (!ResultReg) 3256 return false; 3257 3258 if (!emitStore(VT, ResultReg, Dest)) 3259 return false; 3260 3261 int64_t Size = VT.getSizeInBits() / 8; 3262 Len -= Size; 3263 UnscaledOffset += Size; 3264 3265 // We need to recompute the unscaled offset for each iteration. 3266 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3267 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3268 } 3269 3270 return true; 3271 } 3272 3273 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 3274 /// into the user. The condition code will only be updated on success. 3275 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3276 const Instruction *I, 3277 const Value *Cond) { 3278 if (!isa<ExtractValueInst>(Cond)) 3279 return false; 3280 3281 const auto *EV = cast<ExtractValueInst>(Cond); 3282 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3283 return false; 3284 3285 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3286 MVT RetVT; 3287 const Function *Callee = II->getCalledFunction(); 3288 Type *RetTy = 3289 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3290 if (!isTypeLegal(RetTy, RetVT)) 3291 return false; 3292 3293 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3294 return false; 3295 3296 const Value *LHS = II->getArgOperand(0); 3297 const Value *RHS = II->getArgOperand(1); 3298 3299 // Canonicalize immediate to the RHS. 3300 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3301 isCommutativeIntrinsic(II)) 3302 std::swap(LHS, RHS); 3303 3304 // Simplify multiplies. 3305 Intrinsic::ID IID = II->getIntrinsicID(); 3306 switch (IID) { 3307 default: 3308 break; 3309 case Intrinsic::smul_with_overflow: 3310 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3311 if (C->getValue() == 2) 3312 IID = Intrinsic::sadd_with_overflow; 3313 break; 3314 case Intrinsic::umul_with_overflow: 3315 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3316 if (C->getValue() == 2) 3317 IID = Intrinsic::uadd_with_overflow; 3318 break; 3319 } 3320 3321 AArch64CC::CondCode TmpCC; 3322 switch (IID) { 3323 default: 3324 return false; 3325 case Intrinsic::sadd_with_overflow: 3326 case Intrinsic::ssub_with_overflow: 3327 TmpCC = AArch64CC::VS; 3328 break; 3329 case Intrinsic::uadd_with_overflow: 3330 TmpCC = AArch64CC::HS; 3331 break; 3332 case Intrinsic::usub_with_overflow: 3333 TmpCC = AArch64CC::LO; 3334 break; 3335 case Intrinsic::smul_with_overflow: 3336 case Intrinsic::umul_with_overflow: 3337 TmpCC = AArch64CC::NE; 3338 break; 3339 } 3340 3341 // Check if both instructions are in the same basic block. 3342 if (!isValueAvailable(II)) 3343 return false; 3344 3345 // Make sure nothing is in the way 3346 BasicBlock::const_iterator Start(I); 3347 BasicBlock::const_iterator End(II); 3348 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3349 // We only expect extractvalue instructions between the intrinsic and the 3350 // instruction to be selected. 3351 if (!isa<ExtractValueInst>(Itr)) 3352 return false; 3353 3354 // Check that the extractvalue operand comes from the intrinsic. 3355 const auto *EVI = cast<ExtractValueInst>(Itr); 3356 if (EVI->getAggregateOperand() != II) 3357 return false; 3358 } 3359 3360 CC = TmpCC; 3361 return true; 3362 } 3363 3364 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3365 // FIXME: Handle more intrinsics. 3366 switch (II->getIntrinsicID()) { 3367 default: return false; 3368 case Intrinsic::frameaddress: { 3369 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3370 MFI.setFrameAddressIsTaken(true); 3371 3372 const AArch64RegisterInfo *RegInfo = 3373 static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo()); 3374 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3375 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3376 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3377 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3378 // Recursively load frame address 3379 // ldr x0, [fp] 3380 // ldr x0, [x0] 3381 // ldr x0, [x0] 3382 // ... 3383 unsigned DestReg; 3384 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3385 while (Depth--) { 3386 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3387 SrcReg, /*IsKill=*/true, 0); 3388 assert(DestReg && "Unexpected LDR instruction emission failure."); 3389 SrcReg = DestReg; 3390 } 3391 3392 updateValueMap(II, SrcReg); 3393 return true; 3394 } 3395 case Intrinsic::memcpy: 3396 case Intrinsic::memmove: { 3397 const auto *MTI = cast<MemTransferInst>(II); 3398 // Don't handle volatile. 3399 if (MTI->isVolatile()) 3400 return false; 3401 3402 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3403 // we would emit dead code because we don't currently handle memmoves. 3404 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3405 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3406 // Small memcpy's are common enough that we want to do them without a call 3407 // if possible. 3408 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3409 unsigned Alignment = MTI->getAlignment(); 3410 if (isMemCpySmall(Len, Alignment)) { 3411 Address Dest, Src; 3412 if (!computeAddress(MTI->getRawDest(), Dest) || 3413 !computeAddress(MTI->getRawSource(), Src)) 3414 return false; 3415 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3416 return true; 3417 } 3418 } 3419 3420 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3421 return false; 3422 3423 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3424 // Fast instruction selection doesn't support the special 3425 // address spaces. 3426 return false; 3427 3428 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3429 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); 3430 } 3431 case Intrinsic::memset: { 3432 const MemSetInst *MSI = cast<MemSetInst>(II); 3433 // Don't handle volatile. 3434 if (MSI->isVolatile()) 3435 return false; 3436 3437 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3438 return false; 3439 3440 if (MSI->getDestAddressSpace() > 255) 3441 // Fast instruction selection doesn't support the special 3442 // address spaces. 3443 return false; 3444 3445 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 3446 } 3447 case Intrinsic::sin: 3448 case Intrinsic::cos: 3449 case Intrinsic::pow: { 3450 MVT RetVT; 3451 if (!isTypeLegal(II->getType(), RetVT)) 3452 return false; 3453 3454 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3455 return false; 3456 3457 static const RTLIB::Libcall LibCallTable[3][2] = { 3458 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3459 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3460 { RTLIB::POW_F32, RTLIB::POW_F64 } 3461 }; 3462 RTLIB::Libcall LC; 3463 bool Is64Bit = RetVT == MVT::f64; 3464 switch (II->getIntrinsicID()) { 3465 default: 3466 llvm_unreachable("Unexpected intrinsic."); 3467 case Intrinsic::sin: 3468 LC = LibCallTable[0][Is64Bit]; 3469 break; 3470 case Intrinsic::cos: 3471 LC = LibCallTable[1][Is64Bit]; 3472 break; 3473 case Intrinsic::pow: 3474 LC = LibCallTable[2][Is64Bit]; 3475 break; 3476 } 3477 3478 ArgListTy Args; 3479 Args.reserve(II->getNumArgOperands()); 3480 3481 // Populate the argument list. 3482 for (auto &Arg : II->arg_operands()) { 3483 ArgListEntry Entry; 3484 Entry.Val = Arg; 3485 Entry.Ty = Arg->getType(); 3486 Args.push_back(Entry); 3487 } 3488 3489 CallLoweringInfo CLI; 3490 MCContext &Ctx = MF->getContext(); 3491 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3492 TLI.getLibcallName(LC), std::move(Args)); 3493 if (!lowerCallTo(CLI)) 3494 return false; 3495 updateValueMap(II, CLI.ResultReg); 3496 return true; 3497 } 3498 case Intrinsic::fabs: { 3499 MVT VT; 3500 if (!isTypeLegal(II->getType(), VT)) 3501 return false; 3502 3503 unsigned Opc; 3504 switch (VT.SimpleTy) { 3505 default: 3506 return false; 3507 case MVT::f32: 3508 Opc = AArch64::FABSSr; 3509 break; 3510 case MVT::f64: 3511 Opc = AArch64::FABSDr; 3512 break; 3513 } 3514 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3515 if (!SrcReg) 3516 return false; 3517 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3518 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3519 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3520 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3521 updateValueMap(II, ResultReg); 3522 return true; 3523 } 3524 case Intrinsic::trap: { 3525 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3526 .addImm(1); 3527 return true; 3528 } 3529 case Intrinsic::sqrt: { 3530 Type *RetTy = II->getCalledFunction()->getReturnType(); 3531 3532 MVT VT; 3533 if (!isTypeLegal(RetTy, VT)) 3534 return false; 3535 3536 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3537 if (!Op0Reg) 3538 return false; 3539 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3540 3541 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3542 if (!ResultReg) 3543 return false; 3544 3545 updateValueMap(II, ResultReg); 3546 return true; 3547 } 3548 case Intrinsic::sadd_with_overflow: 3549 case Intrinsic::uadd_with_overflow: 3550 case Intrinsic::ssub_with_overflow: 3551 case Intrinsic::usub_with_overflow: 3552 case Intrinsic::smul_with_overflow: 3553 case Intrinsic::umul_with_overflow: { 3554 // This implements the basic lowering of the xalu with overflow intrinsics. 3555 const Function *Callee = II->getCalledFunction(); 3556 auto *Ty = cast<StructType>(Callee->getReturnType()); 3557 Type *RetTy = Ty->getTypeAtIndex(0U); 3558 3559 MVT VT; 3560 if (!isTypeLegal(RetTy, VT)) 3561 return false; 3562 3563 if (VT != MVT::i32 && VT != MVT::i64) 3564 return false; 3565 3566 const Value *LHS = II->getArgOperand(0); 3567 const Value *RHS = II->getArgOperand(1); 3568 // Canonicalize immediate to the RHS. 3569 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3570 isCommutativeIntrinsic(II)) 3571 std::swap(LHS, RHS); 3572 3573 // Simplify multiplies. 3574 Intrinsic::ID IID = II->getIntrinsicID(); 3575 switch (IID) { 3576 default: 3577 break; 3578 case Intrinsic::smul_with_overflow: 3579 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3580 if (C->getValue() == 2) { 3581 IID = Intrinsic::sadd_with_overflow; 3582 RHS = LHS; 3583 } 3584 break; 3585 case Intrinsic::umul_with_overflow: 3586 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3587 if (C->getValue() == 2) { 3588 IID = Intrinsic::uadd_with_overflow; 3589 RHS = LHS; 3590 } 3591 break; 3592 } 3593 3594 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3595 AArch64CC::CondCode CC = AArch64CC::Invalid; 3596 switch (IID) { 3597 default: llvm_unreachable("Unexpected intrinsic!"); 3598 case Intrinsic::sadd_with_overflow: 3599 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3600 CC = AArch64CC::VS; 3601 break; 3602 case Intrinsic::uadd_with_overflow: 3603 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3604 CC = AArch64CC::HS; 3605 break; 3606 case Intrinsic::ssub_with_overflow: 3607 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3608 CC = AArch64CC::VS; 3609 break; 3610 case Intrinsic::usub_with_overflow: 3611 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3612 CC = AArch64CC::LO; 3613 break; 3614 case Intrinsic::smul_with_overflow: { 3615 CC = AArch64CC::NE; 3616 unsigned LHSReg = getRegForValue(LHS); 3617 if (!LHSReg) 3618 return false; 3619 bool LHSIsKill = hasTrivialKill(LHS); 3620 3621 unsigned RHSReg = getRegForValue(RHS); 3622 if (!RHSReg) 3623 return false; 3624 bool RHSIsKill = hasTrivialKill(RHS); 3625 3626 if (VT == MVT::i32) { 3627 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3628 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3629 /*IsKill=*/false, 32); 3630 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3631 AArch64::sub_32); 3632 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3633 AArch64::sub_32); 3634 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3635 AArch64_AM::ASR, 31, /*WantResult=*/false); 3636 } else { 3637 assert(VT == MVT::i64 && "Unexpected value type."); 3638 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3639 // reused in the next instruction. 3640 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3641 /*IsKill=*/false); 3642 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3643 RHSReg, RHSIsKill); 3644 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3645 AArch64_AM::ASR, 63, /*WantResult=*/false); 3646 } 3647 break; 3648 } 3649 case Intrinsic::umul_with_overflow: { 3650 CC = AArch64CC::NE; 3651 unsigned LHSReg = getRegForValue(LHS); 3652 if (!LHSReg) 3653 return false; 3654 bool LHSIsKill = hasTrivialKill(LHS); 3655 3656 unsigned RHSReg = getRegForValue(RHS); 3657 if (!RHSReg) 3658 return false; 3659 bool RHSIsKill = hasTrivialKill(RHS); 3660 3661 if (VT == MVT::i32) { 3662 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3663 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3664 /*IsKill=*/false, AArch64_AM::LSR, 32, 3665 /*WantResult=*/false); 3666 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3667 AArch64::sub_32); 3668 } else { 3669 assert(VT == MVT::i64 && "Unexpected value type."); 3670 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3671 // reused in the next instruction. 3672 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3673 /*IsKill=*/false); 3674 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3675 RHSReg, RHSIsKill); 3676 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3677 /*IsKill=*/false, /*WantResult=*/false); 3678 } 3679 break; 3680 } 3681 } 3682 3683 if (MulReg) { 3684 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3685 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3686 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3687 } 3688 3689 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3690 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3691 /*IsKill=*/true, getInvertedCondCode(CC)); 3692 (void)ResultReg2; 3693 assert((ResultReg1 + 1) == ResultReg2 && 3694 "Nonconsecutive result registers."); 3695 updateValueMap(II, ResultReg1, 2); 3696 return true; 3697 } 3698 } 3699 return false; 3700 } 3701 3702 bool AArch64FastISel::selectRet(const Instruction *I) { 3703 const ReturnInst *Ret = cast<ReturnInst>(I); 3704 const Function &F = *I->getParent()->getParent(); 3705 3706 if (!FuncInfo.CanLowerReturn) 3707 return false; 3708 3709 if (F.isVarArg()) 3710 return false; 3711 3712 if (TLI.supportSwiftError() && 3713 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3714 return false; 3715 3716 if (TLI.supportSplitCSR(FuncInfo.MF)) 3717 return false; 3718 3719 // Build a list of return value registers. 3720 SmallVector<unsigned, 4> RetRegs; 3721 3722 if (Ret->getNumOperands() > 0) { 3723 CallingConv::ID CC = F.getCallingConv(); 3724 SmallVector<ISD::OutputArg, 4> Outs; 3725 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3726 3727 // Analyze operands of the call, assigning locations to each operand. 3728 SmallVector<CCValAssign, 16> ValLocs; 3729 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3730 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3731 : RetCC_AArch64_AAPCS; 3732 CCInfo.AnalyzeReturn(Outs, RetCC); 3733 3734 // Only handle a single return value for now. 3735 if (ValLocs.size() != 1) 3736 return false; 3737 3738 CCValAssign &VA = ValLocs[0]; 3739 const Value *RV = Ret->getOperand(0); 3740 3741 // Don't bother handling odd stuff for now. 3742 if ((VA.getLocInfo() != CCValAssign::Full) && 3743 (VA.getLocInfo() != CCValAssign::BCvt)) 3744 return false; 3745 3746 // Only handle register returns for now. 3747 if (!VA.isRegLoc()) 3748 return false; 3749 3750 unsigned Reg = getRegForValue(RV); 3751 if (Reg == 0) 3752 return false; 3753 3754 unsigned SrcReg = Reg + VA.getValNo(); 3755 unsigned DestReg = VA.getLocReg(); 3756 // Avoid a cross-class copy. This is very unlikely. 3757 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3758 return false; 3759 3760 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3761 if (!RVEVT.isSimple()) 3762 return false; 3763 3764 // Vectors (of > 1 lane) in big endian need tricky handling. 3765 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3766 !Subtarget->isLittleEndian()) 3767 return false; 3768 3769 MVT RVVT = RVEVT.getSimpleVT(); 3770 if (RVVT == MVT::f128) 3771 return false; 3772 3773 MVT DestVT = VA.getValVT(); 3774 // Special handling for extended integers. 3775 if (RVVT != DestVT) { 3776 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3777 return false; 3778 3779 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3780 return false; 3781 3782 bool IsZExt = Outs[0].Flags.isZExt(); 3783 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3784 if (SrcReg == 0) 3785 return false; 3786 } 3787 3788 // Make the copy. 3789 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3790 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3791 3792 // Add register to return instruction. 3793 RetRegs.push_back(VA.getLocReg()); 3794 } 3795 3796 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3797 TII.get(AArch64::RET_ReallyLR)); 3798 for (unsigned RetReg : RetRegs) 3799 MIB.addReg(RetReg, RegState::Implicit); 3800 return true; 3801 } 3802 3803 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3804 Type *DestTy = I->getType(); 3805 Value *Op = I->getOperand(0); 3806 Type *SrcTy = Op->getType(); 3807 3808 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3809 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3810 if (!SrcEVT.isSimple()) 3811 return false; 3812 if (!DestEVT.isSimple()) 3813 return false; 3814 3815 MVT SrcVT = SrcEVT.getSimpleVT(); 3816 MVT DestVT = DestEVT.getSimpleVT(); 3817 3818 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3819 SrcVT != MVT::i8) 3820 return false; 3821 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3822 DestVT != MVT::i1) 3823 return false; 3824 3825 unsigned SrcReg = getRegForValue(Op); 3826 if (!SrcReg) 3827 return false; 3828 bool SrcIsKill = hasTrivialKill(Op); 3829 3830 // If we're truncating from i64 to a smaller non-legal type then generate an 3831 // AND. Otherwise, we know the high bits are undefined and a truncate only 3832 // generate a COPY. We cannot mark the source register also as result 3833 // register, because this can incorrectly transfer the kill flag onto the 3834 // source register. 3835 unsigned ResultReg; 3836 if (SrcVT == MVT::i64) { 3837 uint64_t Mask = 0; 3838 switch (DestVT.SimpleTy) { 3839 default: 3840 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3841 return false; 3842 case MVT::i1: 3843 Mask = 0x1; 3844 break; 3845 case MVT::i8: 3846 Mask = 0xff; 3847 break; 3848 case MVT::i16: 3849 Mask = 0xffff; 3850 break; 3851 } 3852 // Issue an extract_subreg to get the lower 32-bits. 3853 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3854 AArch64::sub_32); 3855 // Create the AND instruction which performs the actual truncation. 3856 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3857 assert(ResultReg && "Unexpected AND instruction emission failure."); 3858 } else { 3859 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3860 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3861 TII.get(TargetOpcode::COPY), ResultReg) 3862 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3863 } 3864 3865 updateValueMap(I, ResultReg); 3866 return true; 3867 } 3868 3869 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3870 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3871 DestVT == MVT::i64) && 3872 "Unexpected value type."); 3873 // Handle i8 and i16 as i32. 3874 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3875 DestVT = MVT::i32; 3876 3877 if (IsZExt) { 3878 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 3879 assert(ResultReg && "Unexpected AND instruction emission failure."); 3880 if (DestVT == MVT::i64) { 3881 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3882 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3883 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3884 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3885 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3886 .addImm(0) 3887 .addReg(ResultReg) 3888 .addImm(AArch64::sub_32); 3889 ResultReg = Reg64; 3890 } 3891 return ResultReg; 3892 } else { 3893 if (DestVT == MVT::i64) { 3894 // FIXME: We're SExt i1 to i64. 3895 return 0; 3896 } 3897 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3898 /*TODO:IsKill=*/false, 0, 0); 3899 } 3900 } 3901 3902 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3903 unsigned Op1, bool Op1IsKill) { 3904 unsigned Opc, ZReg; 3905 switch (RetVT.SimpleTy) { 3906 default: return 0; 3907 case MVT::i8: 3908 case MVT::i16: 3909 case MVT::i32: 3910 RetVT = MVT::i32; 3911 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3912 case MVT::i64: 3913 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3914 } 3915 3916 const TargetRegisterClass *RC = 3917 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3918 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 3919 /*IsKill=*/ZReg, true); 3920 } 3921 3922 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3923 unsigned Op1, bool Op1IsKill) { 3924 if (RetVT != MVT::i64) 3925 return 0; 3926 3927 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3928 Op0, Op0IsKill, Op1, Op1IsKill, 3929 AArch64::XZR, /*IsKill=*/true); 3930 } 3931 3932 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3933 unsigned Op1, bool Op1IsKill) { 3934 if (RetVT != MVT::i64) 3935 return 0; 3936 3937 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3938 Op0, Op0IsKill, Op1, Op1IsKill, 3939 AArch64::XZR, /*IsKill=*/true); 3940 } 3941 3942 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 3943 unsigned Op1Reg, bool Op1IsKill) { 3944 unsigned Opc = 0; 3945 bool NeedTrunc = false; 3946 uint64_t Mask = 0; 3947 switch (RetVT.SimpleTy) { 3948 default: return 0; 3949 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 3950 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 3951 case MVT::i32: Opc = AArch64::LSLVWr; break; 3952 case MVT::i64: Opc = AArch64::LSLVXr; break; 3953 } 3954 3955 const TargetRegisterClass *RC = 3956 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3957 if (NeedTrunc) { 3958 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 3959 Op1IsKill = true; 3960 } 3961 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 3962 Op1IsKill); 3963 if (NeedTrunc) 3964 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 3965 return ResultReg; 3966 } 3967 3968 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 3969 bool Op0IsKill, uint64_t Shift, 3970 bool IsZExt) { 3971 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 3972 "Unexpected source/return type pair."); 3973 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 3974 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 3975 "Unexpected source value type."); 3976 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 3977 RetVT == MVT::i64) && "Unexpected return value type."); 3978 3979 bool Is64Bit = (RetVT == MVT::i64); 3980 unsigned RegSize = Is64Bit ? 64 : 32; 3981 unsigned DstBits = RetVT.getSizeInBits(); 3982 unsigned SrcBits = SrcVT.getSizeInBits(); 3983 const TargetRegisterClass *RC = 3984 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3985 3986 // Just emit a copy for "zero" shifts. 3987 if (Shift == 0) { 3988 if (RetVT == SrcVT) { 3989 unsigned ResultReg = createResultReg(RC); 3990 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3991 TII.get(TargetOpcode::COPY), ResultReg) 3992 .addReg(Op0, getKillRegState(Op0IsKill)); 3993 return ResultReg; 3994 } else 3995 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 3996 } 3997 3998 // Don't deal with undefined shifts. 3999 if (Shift >= DstBits) 4000 return 0; 4001 4002 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4003 // {S|U}BFM Wd, Wn, #r, #s 4004 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4005 4006 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4007 // %2 = shl i16 %1, 4 4008 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4009 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4010 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4011 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4012 4013 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4014 // %2 = shl i16 %1, 8 4015 // Wd<32+7-24,32-24> = Wn<7:0> 4016 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4017 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4018 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4019 4020 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4021 // %2 = shl i16 %1, 12 4022 // Wd<32+3-20,32-20> = Wn<3:0> 4023 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4024 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4025 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4026 4027 unsigned ImmR = RegSize - Shift; 4028 // Limit the width to the length of the source type. 4029 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4030 static const unsigned OpcTable[2][2] = { 4031 {AArch64::SBFMWri, AArch64::SBFMXri}, 4032 {AArch64::UBFMWri, AArch64::UBFMXri} 4033 }; 4034 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4035 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4036 unsigned TmpReg = MRI.createVirtualRegister(RC); 4037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4038 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4039 .addImm(0) 4040 .addReg(Op0, getKillRegState(Op0IsKill)) 4041 .addImm(AArch64::sub_32); 4042 Op0 = TmpReg; 4043 Op0IsKill = true; 4044 } 4045 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4046 } 4047 4048 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4049 unsigned Op1Reg, bool Op1IsKill) { 4050 unsigned Opc = 0; 4051 bool NeedTrunc = false; 4052 uint64_t Mask = 0; 4053 switch (RetVT.SimpleTy) { 4054 default: return 0; 4055 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4056 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4057 case MVT::i32: Opc = AArch64::LSRVWr; break; 4058 case MVT::i64: Opc = AArch64::LSRVXr; break; 4059 } 4060 4061 const TargetRegisterClass *RC = 4062 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4063 if (NeedTrunc) { 4064 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4065 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4066 Op0IsKill = Op1IsKill = true; 4067 } 4068 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4069 Op1IsKill); 4070 if (NeedTrunc) 4071 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4072 return ResultReg; 4073 } 4074 4075 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4076 bool Op0IsKill, uint64_t Shift, 4077 bool IsZExt) { 4078 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4079 "Unexpected source/return type pair."); 4080 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4081 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4082 "Unexpected source value type."); 4083 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4084 RetVT == MVT::i64) && "Unexpected return value type."); 4085 4086 bool Is64Bit = (RetVT == MVT::i64); 4087 unsigned RegSize = Is64Bit ? 64 : 32; 4088 unsigned DstBits = RetVT.getSizeInBits(); 4089 unsigned SrcBits = SrcVT.getSizeInBits(); 4090 const TargetRegisterClass *RC = 4091 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4092 4093 // Just emit a copy for "zero" shifts. 4094 if (Shift == 0) { 4095 if (RetVT == SrcVT) { 4096 unsigned ResultReg = createResultReg(RC); 4097 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4098 TII.get(TargetOpcode::COPY), ResultReg) 4099 .addReg(Op0, getKillRegState(Op0IsKill)); 4100 return ResultReg; 4101 } else 4102 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4103 } 4104 4105 // Don't deal with undefined shifts. 4106 if (Shift >= DstBits) 4107 return 0; 4108 4109 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4110 // {S|U}BFM Wd, Wn, #r, #s 4111 // Wd<s-r:0> = Wn<s:r> when r <= s 4112 4113 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4114 // %2 = lshr i16 %1, 4 4115 // Wd<7-4:0> = Wn<7:4> 4116 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4117 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4118 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4119 4120 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4121 // %2 = lshr i16 %1, 8 4122 // Wd<7-7,0> = Wn<7:7> 4123 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4124 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4125 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4126 4127 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4128 // %2 = lshr i16 %1, 12 4129 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4130 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4131 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4132 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4133 4134 if (Shift >= SrcBits && IsZExt) 4135 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4136 4137 // It is not possible to fold a sign-extend into the LShr instruction. In this 4138 // case emit a sign-extend. 4139 if (!IsZExt) { 4140 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4141 if (!Op0) 4142 return 0; 4143 Op0IsKill = true; 4144 SrcVT = RetVT; 4145 SrcBits = SrcVT.getSizeInBits(); 4146 IsZExt = true; 4147 } 4148 4149 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4150 unsigned ImmS = SrcBits - 1; 4151 static const unsigned OpcTable[2][2] = { 4152 {AArch64::SBFMWri, AArch64::SBFMXri}, 4153 {AArch64::UBFMWri, AArch64::UBFMXri} 4154 }; 4155 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4156 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4157 unsigned TmpReg = MRI.createVirtualRegister(RC); 4158 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4159 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4160 .addImm(0) 4161 .addReg(Op0, getKillRegState(Op0IsKill)) 4162 .addImm(AArch64::sub_32); 4163 Op0 = TmpReg; 4164 Op0IsKill = true; 4165 } 4166 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4167 } 4168 4169 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4170 unsigned Op1Reg, bool Op1IsKill) { 4171 unsigned Opc = 0; 4172 bool NeedTrunc = false; 4173 uint64_t Mask = 0; 4174 switch (RetVT.SimpleTy) { 4175 default: return 0; 4176 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4177 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4178 case MVT::i32: Opc = AArch64::ASRVWr; break; 4179 case MVT::i64: Opc = AArch64::ASRVXr; break; 4180 } 4181 4182 const TargetRegisterClass *RC = 4183 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4184 if (NeedTrunc) { 4185 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); 4186 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4187 Op0IsKill = Op1IsKill = true; 4188 } 4189 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4190 Op1IsKill); 4191 if (NeedTrunc) 4192 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4193 return ResultReg; 4194 } 4195 4196 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4197 bool Op0IsKill, uint64_t Shift, 4198 bool IsZExt) { 4199 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4200 "Unexpected source/return type pair."); 4201 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4202 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4203 "Unexpected source value type."); 4204 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4205 RetVT == MVT::i64) && "Unexpected return value type."); 4206 4207 bool Is64Bit = (RetVT == MVT::i64); 4208 unsigned RegSize = Is64Bit ? 64 : 32; 4209 unsigned DstBits = RetVT.getSizeInBits(); 4210 unsigned SrcBits = SrcVT.getSizeInBits(); 4211 const TargetRegisterClass *RC = 4212 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4213 4214 // Just emit a copy for "zero" shifts. 4215 if (Shift == 0) { 4216 if (RetVT == SrcVT) { 4217 unsigned ResultReg = createResultReg(RC); 4218 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4219 TII.get(TargetOpcode::COPY), ResultReg) 4220 .addReg(Op0, getKillRegState(Op0IsKill)); 4221 return ResultReg; 4222 } else 4223 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4224 } 4225 4226 // Don't deal with undefined shifts. 4227 if (Shift >= DstBits) 4228 return 0; 4229 4230 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4231 // {S|U}BFM Wd, Wn, #r, #s 4232 // Wd<s-r:0> = Wn<s:r> when r <= s 4233 4234 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4235 // %2 = ashr i16 %1, 4 4236 // Wd<7-4:0> = Wn<7:4> 4237 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4238 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4239 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4240 4241 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4242 // %2 = ashr i16 %1, 8 4243 // Wd<7-7,0> = Wn<7:7> 4244 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4245 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4246 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4247 4248 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4249 // %2 = ashr i16 %1, 12 4250 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4251 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4252 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4253 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4254 4255 if (Shift >= SrcBits && IsZExt) 4256 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4257 4258 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4259 unsigned ImmS = SrcBits - 1; 4260 static const unsigned OpcTable[2][2] = { 4261 {AArch64::SBFMWri, AArch64::SBFMXri}, 4262 {AArch64::UBFMWri, AArch64::UBFMXri} 4263 }; 4264 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4265 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4266 unsigned TmpReg = MRI.createVirtualRegister(RC); 4267 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4268 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4269 .addImm(0) 4270 .addReg(Op0, getKillRegState(Op0IsKill)) 4271 .addImm(AArch64::sub_32); 4272 Op0 = TmpReg; 4273 Op0IsKill = true; 4274 } 4275 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4276 } 4277 4278 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4279 bool IsZExt) { 4280 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4281 4282 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4283 // DestVT are odd things, so test to make sure that they are both types we can 4284 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4285 // bail out to SelectionDAG. 4286 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4287 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4288 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4289 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4290 return 0; 4291 4292 unsigned Opc; 4293 unsigned Imm = 0; 4294 4295 switch (SrcVT.SimpleTy) { 4296 default: 4297 return 0; 4298 case MVT::i1: 4299 return emiti1Ext(SrcReg, DestVT, IsZExt); 4300 case MVT::i8: 4301 if (DestVT == MVT::i64) 4302 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4303 else 4304 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4305 Imm = 7; 4306 break; 4307 case MVT::i16: 4308 if (DestVT == MVT::i64) 4309 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4310 else 4311 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4312 Imm = 15; 4313 break; 4314 case MVT::i32: 4315 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4316 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4317 Imm = 31; 4318 break; 4319 } 4320 4321 // Handle i8 and i16 as i32. 4322 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4323 DestVT = MVT::i32; 4324 else if (DestVT == MVT::i64) { 4325 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4326 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4327 TII.get(AArch64::SUBREG_TO_REG), Src64) 4328 .addImm(0) 4329 .addReg(SrcReg) 4330 .addImm(AArch64::sub_32); 4331 SrcReg = Src64; 4332 } 4333 4334 const TargetRegisterClass *RC = 4335 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4336 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4337 } 4338 4339 static bool isZExtLoad(const MachineInstr *LI) { 4340 switch (LI->getOpcode()) { 4341 default: 4342 return false; 4343 case AArch64::LDURBBi: 4344 case AArch64::LDURHHi: 4345 case AArch64::LDURWi: 4346 case AArch64::LDRBBui: 4347 case AArch64::LDRHHui: 4348 case AArch64::LDRWui: 4349 case AArch64::LDRBBroX: 4350 case AArch64::LDRHHroX: 4351 case AArch64::LDRWroX: 4352 case AArch64::LDRBBroW: 4353 case AArch64::LDRHHroW: 4354 case AArch64::LDRWroW: 4355 return true; 4356 } 4357 } 4358 4359 static bool isSExtLoad(const MachineInstr *LI) { 4360 switch (LI->getOpcode()) { 4361 default: 4362 return false; 4363 case AArch64::LDURSBWi: 4364 case AArch64::LDURSHWi: 4365 case AArch64::LDURSBXi: 4366 case AArch64::LDURSHXi: 4367 case AArch64::LDURSWi: 4368 case AArch64::LDRSBWui: 4369 case AArch64::LDRSHWui: 4370 case AArch64::LDRSBXui: 4371 case AArch64::LDRSHXui: 4372 case AArch64::LDRSWui: 4373 case AArch64::LDRSBWroX: 4374 case AArch64::LDRSHWroX: 4375 case AArch64::LDRSBXroX: 4376 case AArch64::LDRSHXroX: 4377 case AArch64::LDRSWroX: 4378 case AArch64::LDRSBWroW: 4379 case AArch64::LDRSHWroW: 4380 case AArch64::LDRSBXroW: 4381 case AArch64::LDRSHXroW: 4382 case AArch64::LDRSWroW: 4383 return true; 4384 } 4385 } 4386 4387 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4388 MVT SrcVT) { 4389 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4390 if (!LI || !LI->hasOneUse()) 4391 return false; 4392 4393 // Check if the load instruction has already been selected. 4394 unsigned Reg = lookUpRegForValue(LI); 4395 if (!Reg) 4396 return false; 4397 4398 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4399 if (!MI) 4400 return false; 4401 4402 // Check if the correct load instruction has been emitted - SelectionDAG might 4403 // have emitted a zero-extending load, but we need a sign-extending load. 4404 bool IsZExt = isa<ZExtInst>(I); 4405 const auto *LoadMI = MI; 4406 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4407 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4408 unsigned LoadReg = MI->getOperand(1).getReg(); 4409 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4410 assert(LoadMI && "Expected valid instruction"); 4411 } 4412 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4413 return false; 4414 4415 // Nothing to be done. 4416 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4417 updateValueMap(I, Reg); 4418 return true; 4419 } 4420 4421 if (IsZExt) { 4422 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4423 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4424 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4425 .addImm(0) 4426 .addReg(Reg, getKillRegState(true)) 4427 .addImm(AArch64::sub_32); 4428 Reg = Reg64; 4429 } else { 4430 assert((MI->getOpcode() == TargetOpcode::COPY && 4431 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4432 "Expected copy instruction"); 4433 Reg = MI->getOperand(1).getReg(); 4434 MI->eraseFromParent(); 4435 } 4436 updateValueMap(I, Reg); 4437 return true; 4438 } 4439 4440 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4441 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4442 "Unexpected integer extend instruction."); 4443 MVT RetVT; 4444 MVT SrcVT; 4445 if (!isTypeSupported(I->getType(), RetVT)) 4446 return false; 4447 4448 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4449 return false; 4450 4451 // Try to optimize already sign-/zero-extended values from load instructions. 4452 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4453 return true; 4454 4455 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4456 if (!SrcReg) 4457 return false; 4458 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4459 4460 // Try to optimize already sign-/zero-extended values from function arguments. 4461 bool IsZExt = isa<ZExtInst>(I); 4462 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4463 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4464 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4465 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4467 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4468 .addImm(0) 4469 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4470 .addImm(AArch64::sub_32); 4471 SrcReg = ResultReg; 4472 } 4473 // Conservatively clear all kill flags from all uses, because we are 4474 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4475 // level. The result of the instruction at IR level might have been 4476 // trivially dead, which is now not longer true. 4477 unsigned UseReg = lookUpRegForValue(I); 4478 if (UseReg) 4479 MRI.clearKillFlags(UseReg); 4480 4481 updateValueMap(I, SrcReg); 4482 return true; 4483 } 4484 } 4485 4486 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4487 if (!ResultReg) 4488 return false; 4489 4490 updateValueMap(I, ResultReg); 4491 return true; 4492 } 4493 4494 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4495 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4496 if (!DestEVT.isSimple()) 4497 return false; 4498 4499 MVT DestVT = DestEVT.getSimpleVT(); 4500 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4501 return false; 4502 4503 unsigned DivOpc; 4504 bool Is64bit = (DestVT == MVT::i64); 4505 switch (ISDOpcode) { 4506 default: 4507 return false; 4508 case ISD::SREM: 4509 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4510 break; 4511 case ISD::UREM: 4512 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4513 break; 4514 } 4515 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4516 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4517 if (!Src0Reg) 4518 return false; 4519 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4520 4521 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4522 if (!Src1Reg) 4523 return false; 4524 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4525 4526 const TargetRegisterClass *RC = 4527 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4528 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4529 Src1Reg, /*IsKill=*/false); 4530 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4531 // The remainder is computed as numerator - (quotient * denominator) using the 4532 // MSUB instruction. 4533 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4534 Src1Reg, Src1IsKill, Src0Reg, 4535 Src0IsKill); 4536 updateValueMap(I, ResultReg); 4537 return true; 4538 } 4539 4540 bool AArch64FastISel::selectMul(const Instruction *I) { 4541 MVT VT; 4542 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4543 return false; 4544 4545 if (VT.isVector()) 4546 return selectBinaryOp(I, ISD::MUL); 4547 4548 const Value *Src0 = I->getOperand(0); 4549 const Value *Src1 = I->getOperand(1); 4550 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4551 if (C->getValue().isPowerOf2()) 4552 std::swap(Src0, Src1); 4553 4554 // Try to simplify to a shift instruction. 4555 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4556 if (C->getValue().isPowerOf2()) { 4557 uint64_t ShiftVal = C->getValue().logBase2(); 4558 MVT SrcVT = VT; 4559 bool IsZExt = true; 4560 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4561 if (!isIntExtFree(ZExt)) { 4562 MVT VT; 4563 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4564 SrcVT = VT; 4565 IsZExt = true; 4566 Src0 = ZExt->getOperand(0); 4567 } 4568 } 4569 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4570 if (!isIntExtFree(SExt)) { 4571 MVT VT; 4572 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4573 SrcVT = VT; 4574 IsZExt = false; 4575 Src0 = SExt->getOperand(0); 4576 } 4577 } 4578 } 4579 4580 unsigned Src0Reg = getRegForValue(Src0); 4581 if (!Src0Reg) 4582 return false; 4583 bool Src0IsKill = hasTrivialKill(Src0); 4584 4585 unsigned ResultReg = 4586 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4587 4588 if (ResultReg) { 4589 updateValueMap(I, ResultReg); 4590 return true; 4591 } 4592 } 4593 4594 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4595 if (!Src0Reg) 4596 return false; 4597 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4598 4599 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4600 if (!Src1Reg) 4601 return false; 4602 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4603 4604 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4605 4606 if (!ResultReg) 4607 return false; 4608 4609 updateValueMap(I, ResultReg); 4610 return true; 4611 } 4612 4613 bool AArch64FastISel::selectShift(const Instruction *I) { 4614 MVT RetVT; 4615 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4616 return false; 4617 4618 if (RetVT.isVector()) 4619 return selectOperator(I, I->getOpcode()); 4620 4621 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4622 unsigned ResultReg = 0; 4623 uint64_t ShiftVal = C->getZExtValue(); 4624 MVT SrcVT = RetVT; 4625 bool IsZExt = I->getOpcode() != Instruction::AShr; 4626 const Value *Op0 = I->getOperand(0); 4627 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4628 if (!isIntExtFree(ZExt)) { 4629 MVT TmpVT; 4630 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4631 SrcVT = TmpVT; 4632 IsZExt = true; 4633 Op0 = ZExt->getOperand(0); 4634 } 4635 } 4636 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4637 if (!isIntExtFree(SExt)) { 4638 MVT TmpVT; 4639 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4640 SrcVT = TmpVT; 4641 IsZExt = false; 4642 Op0 = SExt->getOperand(0); 4643 } 4644 } 4645 } 4646 4647 unsigned Op0Reg = getRegForValue(Op0); 4648 if (!Op0Reg) 4649 return false; 4650 bool Op0IsKill = hasTrivialKill(Op0); 4651 4652 switch (I->getOpcode()) { 4653 default: llvm_unreachable("Unexpected instruction."); 4654 case Instruction::Shl: 4655 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4656 break; 4657 case Instruction::AShr: 4658 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4659 break; 4660 case Instruction::LShr: 4661 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4662 break; 4663 } 4664 if (!ResultReg) 4665 return false; 4666 4667 updateValueMap(I, ResultReg); 4668 return true; 4669 } 4670 4671 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4672 if (!Op0Reg) 4673 return false; 4674 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4675 4676 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4677 if (!Op1Reg) 4678 return false; 4679 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4680 4681 unsigned ResultReg = 0; 4682 switch (I->getOpcode()) { 4683 default: llvm_unreachable("Unexpected instruction."); 4684 case Instruction::Shl: 4685 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4686 break; 4687 case Instruction::AShr: 4688 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4689 break; 4690 case Instruction::LShr: 4691 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4692 break; 4693 } 4694 4695 if (!ResultReg) 4696 return false; 4697 4698 updateValueMap(I, ResultReg); 4699 return true; 4700 } 4701 4702 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4703 MVT RetVT, SrcVT; 4704 4705 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4706 return false; 4707 if (!isTypeLegal(I->getType(), RetVT)) 4708 return false; 4709 4710 unsigned Opc; 4711 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4712 Opc = AArch64::FMOVWSr; 4713 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4714 Opc = AArch64::FMOVXDr; 4715 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4716 Opc = AArch64::FMOVSWr; 4717 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4718 Opc = AArch64::FMOVDXr; 4719 else 4720 return false; 4721 4722 const TargetRegisterClass *RC = nullptr; 4723 switch (RetVT.SimpleTy) { 4724 default: llvm_unreachable("Unexpected value type."); 4725 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4726 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4727 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4728 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4729 } 4730 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4731 if (!Op0Reg) 4732 return false; 4733 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4734 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4735 4736 if (!ResultReg) 4737 return false; 4738 4739 updateValueMap(I, ResultReg); 4740 return true; 4741 } 4742 4743 bool AArch64FastISel::selectFRem(const Instruction *I) { 4744 MVT RetVT; 4745 if (!isTypeLegal(I->getType(), RetVT)) 4746 return false; 4747 4748 RTLIB::Libcall LC; 4749 switch (RetVT.SimpleTy) { 4750 default: 4751 return false; 4752 case MVT::f32: 4753 LC = RTLIB::REM_F32; 4754 break; 4755 case MVT::f64: 4756 LC = RTLIB::REM_F64; 4757 break; 4758 } 4759 4760 ArgListTy Args; 4761 Args.reserve(I->getNumOperands()); 4762 4763 // Populate the argument list. 4764 for (auto &Arg : I->operands()) { 4765 ArgListEntry Entry; 4766 Entry.Val = Arg; 4767 Entry.Ty = Arg->getType(); 4768 Args.push_back(Entry); 4769 } 4770 4771 CallLoweringInfo CLI; 4772 MCContext &Ctx = MF->getContext(); 4773 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4774 TLI.getLibcallName(LC), std::move(Args)); 4775 if (!lowerCallTo(CLI)) 4776 return false; 4777 updateValueMap(I, CLI.ResultReg); 4778 return true; 4779 } 4780 4781 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4782 MVT VT; 4783 if (!isTypeLegal(I->getType(), VT)) 4784 return false; 4785 4786 if (!isa<ConstantInt>(I->getOperand(1))) 4787 return selectBinaryOp(I, ISD::SDIV); 4788 4789 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4790 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4791 !(C.isPowerOf2() || (-C).isPowerOf2())) 4792 return selectBinaryOp(I, ISD::SDIV); 4793 4794 unsigned Lg2 = C.countTrailingZeros(); 4795 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4796 if (!Src0Reg) 4797 return false; 4798 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4799 4800 if (cast<BinaryOperator>(I)->isExact()) { 4801 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4802 if (!ResultReg) 4803 return false; 4804 updateValueMap(I, ResultReg); 4805 return true; 4806 } 4807 4808 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4809 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4810 if (!AddReg) 4811 return false; 4812 4813 // (Src0 < 0) ? Pow2 - 1 : 0; 4814 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4815 return false; 4816 4817 unsigned SelectOpc; 4818 const TargetRegisterClass *RC; 4819 if (VT == MVT::i64) { 4820 SelectOpc = AArch64::CSELXr; 4821 RC = &AArch64::GPR64RegClass; 4822 } else { 4823 SelectOpc = AArch64::CSELWr; 4824 RC = &AArch64::GPR32RegClass; 4825 } 4826 unsigned SelectReg = 4827 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4828 Src0IsKill, AArch64CC::LT); 4829 if (!SelectReg) 4830 return false; 4831 4832 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4833 // negate the result. 4834 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4835 unsigned ResultReg; 4836 if (C.isNegative()) 4837 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4838 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4839 else 4840 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4841 4842 if (!ResultReg) 4843 return false; 4844 4845 updateValueMap(I, ResultReg); 4846 return true; 4847 } 4848 4849 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4850 /// have to duplicate it for AArch64, because otherwise we would fail during the 4851 /// sign-extend emission. 4852 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4853 unsigned IdxN = getRegForValue(Idx); 4854 if (IdxN == 0) 4855 // Unhandled operand. Halt "fast" selection and bail. 4856 return std::pair<unsigned, bool>(0, false); 4857 4858 bool IdxNIsKill = hasTrivialKill(Idx); 4859 4860 // If the index is smaller or larger than intptr_t, truncate or extend it. 4861 MVT PtrVT = TLI.getPointerTy(DL); 4862 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4863 if (IdxVT.bitsLT(PtrVT)) { 4864 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); 4865 IdxNIsKill = true; 4866 } else if (IdxVT.bitsGT(PtrVT)) 4867 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4868 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 4869 } 4870 4871 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4872 /// duplicate it for AArch64, because otherwise we would bail out even for 4873 /// simple cases. This is because the standard fastEmit functions don't cover 4874 /// MUL at all and ADD is lowered very inefficientily. 4875 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4876 unsigned N = getRegForValue(I->getOperand(0)); 4877 if (!N) 4878 return false; 4879 bool NIsKill = hasTrivialKill(I->getOperand(0)); 4880 4881 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4882 // into a single N = N + TotalOffset. 4883 uint64_t TotalOffs = 0; 4884 MVT VT = TLI.getPointerTy(DL); 4885 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4886 GTI != E; ++GTI) { 4887 const Value *Idx = GTI.getOperand(); 4888 if (auto *StTy = GTI.getStructTypeOrNull()) { 4889 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4890 // N = N + Offset 4891 if (Field) 4892 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4893 } else { 4894 Type *Ty = GTI.getIndexedType(); 4895 4896 // If this is a constant subscript, handle it quickly. 4897 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4898 if (CI->isZero()) 4899 continue; 4900 // N = N + Offset 4901 TotalOffs += 4902 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4903 continue; 4904 } 4905 if (TotalOffs) { 4906 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4907 if (!N) 4908 return false; 4909 NIsKill = true; 4910 TotalOffs = 0; 4911 } 4912 4913 // N = N + Idx * ElementSize; 4914 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4915 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 4916 unsigned IdxN = Pair.first; 4917 bool IdxNIsKill = Pair.second; 4918 if (!IdxN) 4919 return false; 4920 4921 if (ElementSize != 1) { 4922 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4923 if (!C) 4924 return false; 4925 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 4926 if (!IdxN) 4927 return false; 4928 IdxNIsKill = true; 4929 } 4930 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 4931 if (!N) 4932 return false; 4933 } 4934 } 4935 if (TotalOffs) { 4936 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4937 if (!N) 4938 return false; 4939 } 4940 updateValueMap(I, N); 4941 return true; 4942 } 4943 4944 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 4945 assert(TM.getOptLevel() == CodeGenOpt::None && 4946 "cmpxchg survived AtomicExpand at optlevel > -O0"); 4947 4948 auto *RetPairTy = cast<StructType>(I->getType()); 4949 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 4950 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 4951 "cmpxchg has a non-i1 status result"); 4952 4953 MVT VT; 4954 if (!isTypeLegal(RetTy, VT)) 4955 return false; 4956 4957 const TargetRegisterClass *ResRC; 4958 unsigned Opc, CmpOpc; 4959 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 4960 // extractvalue selection doesn't support that. 4961 if (VT == MVT::i32) { 4962 Opc = AArch64::CMP_SWAP_32; 4963 CmpOpc = AArch64::SUBSWrs; 4964 ResRC = &AArch64::GPR32RegClass; 4965 } else if (VT == MVT::i64) { 4966 Opc = AArch64::CMP_SWAP_64; 4967 CmpOpc = AArch64::SUBSXrs; 4968 ResRC = &AArch64::GPR64RegClass; 4969 } else { 4970 return false; 4971 } 4972 4973 const MCInstrDesc &II = TII.get(Opc); 4974 4975 const unsigned AddrReg = constrainOperandRegClass( 4976 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 4977 const unsigned DesiredReg = constrainOperandRegClass( 4978 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 4979 const unsigned NewReg = constrainOperandRegClass( 4980 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 4981 4982 const unsigned ResultReg1 = createResultReg(ResRC); 4983 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 4984 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); 4985 4986 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 4987 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 4988 .addDef(ResultReg1) 4989 .addDef(ScratchReg) 4990 .addUse(AddrReg) 4991 .addUse(DesiredReg) 4992 .addUse(NewReg); 4993 4994 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 4995 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 4996 .addUse(ResultReg1) 4997 .addUse(DesiredReg) 4998 .addImm(0); 4999 5000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5001 .addDef(ResultReg2) 5002 .addUse(AArch64::WZR) 5003 .addUse(AArch64::WZR) 5004 .addImm(AArch64CC::NE); 5005 5006 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5007 updateValueMap(I, ResultReg1, 2); 5008 return true; 5009 } 5010 5011 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5012 switch (I->getOpcode()) { 5013 default: 5014 break; 5015 case Instruction::Add: 5016 case Instruction::Sub: 5017 return selectAddSub(I); 5018 case Instruction::Mul: 5019 return selectMul(I); 5020 case Instruction::SDiv: 5021 return selectSDiv(I); 5022 case Instruction::SRem: 5023 if (!selectBinaryOp(I, ISD::SREM)) 5024 return selectRem(I, ISD::SREM); 5025 return true; 5026 case Instruction::URem: 5027 if (!selectBinaryOp(I, ISD::UREM)) 5028 return selectRem(I, ISD::UREM); 5029 return true; 5030 case Instruction::Shl: 5031 case Instruction::LShr: 5032 case Instruction::AShr: 5033 return selectShift(I); 5034 case Instruction::And: 5035 case Instruction::Or: 5036 case Instruction::Xor: 5037 return selectLogicalOp(I); 5038 case Instruction::Br: 5039 return selectBranch(I); 5040 case Instruction::IndirectBr: 5041 return selectIndirectBr(I); 5042 case Instruction::BitCast: 5043 if (!FastISel::selectBitCast(I)) 5044 return selectBitCast(I); 5045 return true; 5046 case Instruction::FPToSI: 5047 if (!selectCast(I, ISD::FP_TO_SINT)) 5048 return selectFPToInt(I, /*Signed=*/true); 5049 return true; 5050 case Instruction::FPToUI: 5051 return selectFPToInt(I, /*Signed=*/false); 5052 case Instruction::ZExt: 5053 case Instruction::SExt: 5054 return selectIntExt(I); 5055 case Instruction::Trunc: 5056 if (!selectCast(I, ISD::TRUNCATE)) 5057 return selectTrunc(I); 5058 return true; 5059 case Instruction::FPExt: 5060 return selectFPExt(I); 5061 case Instruction::FPTrunc: 5062 return selectFPTrunc(I); 5063 case Instruction::SIToFP: 5064 if (!selectCast(I, ISD::SINT_TO_FP)) 5065 return selectIntToFP(I, /*Signed=*/true); 5066 return true; 5067 case Instruction::UIToFP: 5068 return selectIntToFP(I, /*Signed=*/false); 5069 case Instruction::Load: 5070 return selectLoad(I); 5071 case Instruction::Store: 5072 return selectStore(I); 5073 case Instruction::FCmp: 5074 case Instruction::ICmp: 5075 return selectCmp(I); 5076 case Instruction::Select: 5077 return selectSelect(I); 5078 case Instruction::Ret: 5079 return selectRet(I); 5080 case Instruction::FRem: 5081 return selectFRem(I); 5082 case Instruction::GetElementPtr: 5083 return selectGetElementPtr(I); 5084 case Instruction::AtomicCmpXchg: 5085 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5086 } 5087 5088 // fall-back to target-independent instruction selection. 5089 return selectOperator(I, I->getOpcode()); 5090 // Silence warnings. 5091 (void)&CC_AArch64_DarwinPCS_VarArg; 5092 } 5093 5094 namespace llvm { 5095 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5096 const TargetLibraryInfo *LibInfo) { 5097 return new AArch64FastISel(FuncInfo, LibInfo); 5098 } 5099 } 5100