1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the AArch64-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // AArch64GenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64.h" 17 #include "AArch64CallingConvention.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/FastISel.h" 24 #include "llvm/CodeGen/FunctionLoweringInfo.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/IR/CallingConv.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/GetElementPtrTypeIterator.h" 34 #include "llvm/IR/GlobalAlias.h" 35 #include "llvm/IR/GlobalVariable.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Operator.h" 39 #include "llvm/Support/CommandLine.h" 40 using namespace llvm; 41 42 namespace { 43 44 class AArch64FastISel final : public FastISel { 45 class Address { 46 public: 47 typedef enum { 48 RegBase, 49 FrameIndexBase 50 } BaseKind; 51 52 private: 53 BaseKind Kind; 54 AArch64_AM::ShiftExtendType ExtType; 55 union { 56 unsigned Reg; 57 int FI; 58 } Base; 59 unsigned OffsetReg; 60 unsigned Shift; 61 int64_t Offset; 62 const GlobalValue *GV; 63 64 public: 65 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), 66 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } 67 void setKind(BaseKind K) { Kind = K; } 68 BaseKind getKind() const { return Kind; } 69 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 70 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 71 bool isRegBase() const { return Kind == RegBase; } 72 bool isFIBase() const { return Kind == FrameIndexBase; } 73 void setReg(unsigned Reg) { 74 assert(isRegBase() && "Invalid base register access!"); 75 Base.Reg = Reg; 76 } 77 unsigned getReg() const { 78 assert(isRegBase() && "Invalid base register access!"); 79 return Base.Reg; 80 } 81 void setOffsetReg(unsigned Reg) { 82 OffsetReg = Reg; 83 } 84 unsigned getOffsetReg() const { 85 return OffsetReg; 86 } 87 void setFI(unsigned FI) { 88 assert(isFIBase() && "Invalid base frame index access!"); 89 Base.FI = FI; 90 } 91 unsigned getFI() const { 92 assert(isFIBase() && "Invalid base frame index access!"); 93 return Base.FI; 94 } 95 void setOffset(int64_t O) { Offset = O; } 96 int64_t getOffset() { return Offset; } 97 void setShift(unsigned S) { Shift = S; } 98 unsigned getShift() { return Shift; } 99 100 void setGlobalValue(const GlobalValue *G) { GV = G; } 101 const GlobalValue *getGlobalValue() { return GV; } 102 }; 103 104 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 105 /// make the right decision when generating code for different targets. 106 const AArch64Subtarget *Subtarget; 107 LLVMContext *Context; 108 109 bool fastLowerArguments() override; 110 bool fastLowerCall(CallLoweringInfo &CLI) override; 111 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 112 113 private: 114 // Selection routines. 115 bool selectAddSub(const Instruction *I); 116 bool selectLogicalOp(const Instruction *I); 117 bool selectLoad(const Instruction *I); 118 bool selectStore(const Instruction *I); 119 bool selectBranch(const Instruction *I); 120 bool selectIndirectBr(const Instruction *I); 121 bool selectCmp(const Instruction *I); 122 bool selectSelect(const Instruction *I); 123 bool selectFPExt(const Instruction *I); 124 bool selectFPTrunc(const Instruction *I); 125 bool selectFPToInt(const Instruction *I, bool Signed); 126 bool selectIntToFP(const Instruction *I, bool Signed); 127 bool selectRem(const Instruction *I, unsigned ISDOpcode); 128 bool selectRet(const Instruction *I); 129 bool selectTrunc(const Instruction *I); 130 bool selectIntExt(const Instruction *I); 131 bool selectMul(const Instruction *I); 132 bool selectShift(const Instruction *I); 133 bool selectBitCast(const Instruction *I); 134 bool selectFRem(const Instruction *I); 135 bool selectSDiv(const Instruction *I); 136 bool selectGetElementPtr(const Instruction *I); 137 138 // Utility helper routines. 139 bool isTypeLegal(Type *Ty, MVT &VT); 140 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 141 bool isValueAvailable(const Value *V) const; 142 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 143 bool computeCallAddress(const Value *V, Address &Addr); 144 bool simplifyAddress(Address &Addr, MVT VT); 145 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 146 unsigned Flags, unsigned ScaleFactor, 147 MachineMemOperand *MMO); 148 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 149 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 150 unsigned Alignment); 151 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 152 const Value *Cond); 153 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 154 bool optimizeSelect(const SelectInst *SI); 155 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 156 157 // Emit helper routines. 158 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 159 const Value *RHS, bool SetFlags = false, 160 bool WantResult = true, bool IsZExt = false); 161 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 162 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 163 bool SetFlags = false, bool WantResult = true); 164 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 165 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 166 bool WantResult = true); 167 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 168 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 169 AArch64_AM::ShiftExtendType ShiftType, 170 uint64_t ShiftImm, bool SetFlags = false, 171 bool WantResult = true); 172 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 173 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 174 AArch64_AM::ShiftExtendType ExtType, 175 uint64_t ShiftImm, bool SetFlags = false, 176 bool WantResult = true); 177 178 // Emit functions. 179 bool emitCompareAndBranch(const BranchInst *BI); 180 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 181 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 182 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 183 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 184 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 185 MachineMemOperand *MMO = nullptr); 186 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 187 MachineMemOperand *MMO = nullptr); 188 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 189 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 190 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 191 bool SetFlags = false, bool WantResult = true, 192 bool IsZExt = false); 193 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 194 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 195 bool SetFlags = false, bool WantResult = true, 196 bool IsZExt = false); 197 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 198 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 199 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 200 unsigned RHSReg, bool RHSIsKill, 201 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 202 bool WantResult = true); 203 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 204 const Value *RHS); 205 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 206 bool LHSIsKill, uint64_t Imm); 207 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 209 uint64_t ShiftImm); 210 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 211 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 212 unsigned Op1, bool Op1IsKill); 213 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 214 unsigned Op1, bool Op1IsKill); 215 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 216 unsigned Op1, bool Op1IsKill); 217 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 218 unsigned Op1Reg, bool Op1IsKill); 219 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 220 uint64_t Imm, bool IsZExt = true); 221 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 222 unsigned Op1Reg, bool Op1IsKill); 223 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 224 uint64_t Imm, bool IsZExt = true); 225 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 226 unsigned Op1Reg, bool Op1IsKill); 227 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 228 uint64_t Imm, bool IsZExt = false); 229 230 unsigned materializeInt(const ConstantInt *CI, MVT VT); 231 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 232 unsigned materializeGV(const GlobalValue *GV); 233 234 // Call handling routines. 235 private: 236 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 237 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 238 unsigned &NumBytes); 239 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 240 241 public: 242 // Backend specific FastISel code. 243 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 244 unsigned fastMaterializeConstant(const Constant *C) override; 245 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 246 247 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 248 const TargetLibraryInfo *LibInfo) 249 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 250 Subtarget = 251 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 252 Context = &FuncInfo.Fn->getContext(); 253 } 254 255 bool fastSelectInstruction(const Instruction *I) override; 256 257 #include "AArch64GenFastISel.inc" 258 }; 259 260 } // end anonymous namespace 261 262 #include "AArch64GenCallingConv.inc" 263 264 /// \brief Check if the sign-/zero-extend will be a noop. 265 static bool isIntExtFree(const Instruction *I) { 266 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 267 "Unexpected integer extend instruction."); 268 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 269 "Unexpected value type."); 270 bool IsZExt = isa<ZExtInst>(I); 271 272 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 273 if (LI->hasOneUse()) 274 return true; 275 276 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 277 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 278 return true; 279 280 return false; 281 } 282 283 /// \brief Determine the implicit scale factor that is applied by a memory 284 /// operation for a given value type. 285 static unsigned getImplicitScaleFactor(MVT VT) { 286 switch (VT.SimpleTy) { 287 default: 288 return 0; // invalid 289 case MVT::i1: // fall-through 290 case MVT::i8: 291 return 1; 292 case MVT::i16: 293 return 2; 294 case MVT::i32: // fall-through 295 case MVT::f32: 296 return 4; 297 case MVT::i64: // fall-through 298 case MVT::f64: 299 return 8; 300 } 301 } 302 303 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 304 if (CC == CallingConv::WebKit_JS) 305 return CC_AArch64_WebKit_JS; 306 if (CC == CallingConv::GHC) 307 return CC_AArch64_GHC; 308 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 309 } 310 311 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 312 assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && 313 "Alloca should always return a pointer."); 314 315 // Don't handle dynamic allocas. 316 if (!FuncInfo.StaticAllocaMap.count(AI)) 317 return 0; 318 319 DenseMap<const AllocaInst *, int>::iterator SI = 320 FuncInfo.StaticAllocaMap.find(AI); 321 322 if (SI != FuncInfo.StaticAllocaMap.end()) { 323 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 324 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 325 ResultReg) 326 .addFrameIndex(SI->second) 327 .addImm(0) 328 .addImm(0); 329 return ResultReg; 330 } 331 332 return 0; 333 } 334 335 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 336 if (VT > MVT::i64) 337 return 0; 338 339 if (!CI->isZero()) 340 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 341 342 // Create a copy from the zero register to materialize a "0" value. 343 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 344 : &AArch64::GPR32RegClass; 345 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 346 unsigned ResultReg = createResultReg(RC); 347 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 348 ResultReg).addReg(ZeroReg, getKillRegState(true)); 349 return ResultReg; 350 } 351 352 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 353 // Positive zero (+0.0) has to be materialized with a fmov from the zero 354 // register, because the immediate version of fmov cannot encode zero. 355 if (CFP->isNullValue()) 356 return fastMaterializeFloatZero(CFP); 357 358 if (VT != MVT::f32 && VT != MVT::f64) 359 return 0; 360 361 const APFloat Val = CFP->getValueAPF(); 362 bool Is64Bit = (VT == MVT::f64); 363 // This checks to see if we can use FMOV instructions to materialize 364 // a constant, otherwise we have to materialize via the constant pool. 365 if (TLI.isFPImmLegal(Val, VT)) { 366 int Imm = 367 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 368 assert((Imm != -1) && "Cannot encode floating-point constant."); 369 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 370 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 371 } 372 373 // For the MachO large code model materialize the FP constant in code. 374 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 375 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 376 const TargetRegisterClass *RC = Is64Bit ? 377 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 378 379 unsigned TmpReg = createResultReg(RC); 380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 381 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 382 383 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 384 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 385 TII.get(TargetOpcode::COPY), ResultReg) 386 .addReg(TmpReg, getKillRegState(true)); 387 388 return ResultReg; 389 } 390 391 // Materialize via constant pool. MachineConstantPool wants an explicit 392 // alignment. 393 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 394 if (Align == 0) 395 Align = DL.getTypeAllocSize(CFP->getType()); 396 397 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 398 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 399 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 400 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 401 402 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 403 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 404 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 405 .addReg(ADRPReg) 406 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 407 return ResultReg; 408 } 409 410 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 411 // We can't handle thread-local variables quickly yet. 412 if (GV->isThreadLocal()) 413 return 0; 414 415 // MachO still uses GOT for large code-model accesses, but ELF requires 416 // movz/movk sequences, which FastISel doesn't handle yet. 417 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 418 return 0; 419 420 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 421 422 EVT DestEVT = TLI.getValueType(GV->getType(), true); 423 if (!DestEVT.isSimple()) 424 return 0; 425 426 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 427 unsigned ResultReg; 428 429 if (OpFlags & AArch64II::MO_GOT) { 430 // ADRP + LDRX 431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 432 ADRPReg) 433 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 434 435 ResultReg = createResultReg(&AArch64::GPR64RegClass); 436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 437 ResultReg) 438 .addReg(ADRPReg) 439 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 440 AArch64II::MO_NC); 441 } else if (OpFlags & AArch64II::MO_CONSTPOOL) { 442 // We can't handle addresses loaded from a constant pool quickly yet. 443 return 0; 444 } else { 445 // ADRP + ADDX 446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 447 ADRPReg) 448 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 449 450 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 452 ResultReg) 453 .addReg(ADRPReg) 454 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 455 .addImm(0); 456 } 457 return ResultReg; 458 } 459 460 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 461 EVT CEVT = TLI.getValueType(C->getType(), true); 462 463 // Only handle simple types. 464 if (!CEVT.isSimple()) 465 return 0; 466 MVT VT = CEVT.getSimpleVT(); 467 468 if (const auto *CI = dyn_cast<ConstantInt>(C)) 469 return materializeInt(CI, VT); 470 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 471 return materializeFP(CFP, VT); 472 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 473 return materializeGV(GV); 474 475 return 0; 476 } 477 478 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 479 assert(CFP->isNullValue() && 480 "Floating-point constant is not a positive zero."); 481 MVT VT; 482 if (!isTypeLegal(CFP->getType(), VT)) 483 return 0; 484 485 if (VT != MVT::f32 && VT != MVT::f64) 486 return 0; 487 488 bool Is64Bit = (VT == MVT::f64); 489 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 490 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 491 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 492 } 493 494 /// \brief Check if the multiply is by a power-of-2 constant. 495 static bool isMulPowOf2(const Value *I) { 496 if (const auto *MI = dyn_cast<MulOperator>(I)) { 497 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 498 if (C->getValue().isPowerOf2()) 499 return true; 500 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 501 if (C->getValue().isPowerOf2()) 502 return true; 503 } 504 return false; 505 } 506 507 // Computes the address to get to an object. 508 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 509 { 510 const User *U = nullptr; 511 unsigned Opcode = Instruction::UserOp1; 512 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 513 // Don't walk into other basic blocks unless the object is an alloca from 514 // another block, otherwise it may not have a virtual register assigned. 515 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 516 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 517 Opcode = I->getOpcode(); 518 U = I; 519 } 520 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 521 Opcode = C->getOpcode(); 522 U = C; 523 } 524 525 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 526 if (Ty->getAddressSpace() > 255) 527 // Fast instruction selection doesn't support the special 528 // address spaces. 529 return false; 530 531 switch (Opcode) { 532 default: 533 break; 534 case Instruction::BitCast: { 535 // Look through bitcasts. 536 return computeAddress(U->getOperand(0), Addr, Ty); 537 } 538 case Instruction::IntToPtr: { 539 // Look past no-op inttoptrs. 540 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 541 return computeAddress(U->getOperand(0), Addr, Ty); 542 break; 543 } 544 case Instruction::PtrToInt: { 545 // Look past no-op ptrtoints. 546 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 547 return computeAddress(U->getOperand(0), Addr, Ty); 548 break; 549 } 550 case Instruction::GetElementPtr: { 551 Address SavedAddr = Addr; 552 uint64_t TmpOffset = Addr.getOffset(); 553 554 // Iterate through the GEP folding the constants into offsets where 555 // we can. 556 gep_type_iterator GTI = gep_type_begin(U); 557 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; 558 ++i, ++GTI) { 559 const Value *Op = *i; 560 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 561 const StructLayout *SL = DL.getStructLayout(STy); 562 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 563 TmpOffset += SL->getElementOffset(Idx); 564 } else { 565 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 566 for (;;) { 567 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 568 // Constant-offset addressing. 569 TmpOffset += CI->getSExtValue() * S; 570 break; 571 } 572 if (canFoldAddIntoGEP(U, Op)) { 573 // A compatible add with a constant operand. Fold the constant. 574 ConstantInt *CI = 575 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 576 TmpOffset += CI->getSExtValue() * S; 577 // Iterate on the other operand. 578 Op = cast<AddOperator>(Op)->getOperand(0); 579 continue; 580 } 581 // Unsupported 582 goto unsupported_gep; 583 } 584 } 585 } 586 587 // Try to grab the base operand now. 588 Addr.setOffset(TmpOffset); 589 if (computeAddress(U->getOperand(0), Addr, Ty)) 590 return true; 591 592 // We failed, restore everything and try the other options. 593 Addr = SavedAddr; 594 595 unsupported_gep: 596 break; 597 } 598 case Instruction::Alloca: { 599 const AllocaInst *AI = cast<AllocaInst>(Obj); 600 DenseMap<const AllocaInst *, int>::iterator SI = 601 FuncInfo.StaticAllocaMap.find(AI); 602 if (SI != FuncInfo.StaticAllocaMap.end()) { 603 Addr.setKind(Address::FrameIndexBase); 604 Addr.setFI(SI->second); 605 return true; 606 } 607 break; 608 } 609 case Instruction::Add: { 610 // Adds of constants are common and easy enough. 611 const Value *LHS = U->getOperand(0); 612 const Value *RHS = U->getOperand(1); 613 614 if (isa<ConstantInt>(LHS)) 615 std::swap(LHS, RHS); 616 617 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 618 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 619 return computeAddress(LHS, Addr, Ty); 620 } 621 622 Address Backup = Addr; 623 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 624 return true; 625 Addr = Backup; 626 627 break; 628 } 629 case Instruction::Sub: { 630 // Subs of constants are common and easy enough. 631 const Value *LHS = U->getOperand(0); 632 const Value *RHS = U->getOperand(1); 633 634 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 635 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 636 return computeAddress(LHS, Addr, Ty); 637 } 638 break; 639 } 640 case Instruction::Shl: { 641 if (Addr.getOffsetReg()) 642 break; 643 644 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 645 if (!CI) 646 break; 647 648 unsigned Val = CI->getZExtValue(); 649 if (Val < 1 || Val > 3) 650 break; 651 652 uint64_t NumBytes = 0; 653 if (Ty && Ty->isSized()) { 654 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 655 NumBytes = NumBits / 8; 656 if (!isPowerOf2_64(NumBits)) 657 NumBytes = 0; 658 } 659 660 if (NumBytes != (1ULL << Val)) 661 break; 662 663 Addr.setShift(Val); 664 Addr.setExtendType(AArch64_AM::LSL); 665 666 const Value *Src = U->getOperand(0); 667 if (const auto *I = dyn_cast<Instruction>(Src)) 668 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 669 Src = I; 670 671 // Fold the zext or sext when it won't become a noop. 672 if (const auto *ZE = dyn_cast<ZExtInst>(Src)) { 673 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 674 Addr.setExtendType(AArch64_AM::UXTW); 675 Src = ZE->getOperand(0); 676 } 677 } else if (const auto *SE = dyn_cast<SExtInst>(Src)) { 678 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 679 Addr.setExtendType(AArch64_AM::SXTW); 680 Src = SE->getOperand(0); 681 } 682 } 683 684 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 685 if (AI->getOpcode() == Instruction::And) { 686 const Value *LHS = AI->getOperand(0); 687 const Value *RHS = AI->getOperand(1); 688 689 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 690 if (C->getValue() == 0xffffffff) 691 std::swap(LHS, RHS); 692 693 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 694 if (C->getValue() == 0xffffffff) { 695 Addr.setExtendType(AArch64_AM::UXTW); 696 unsigned Reg = getRegForValue(LHS); 697 if (!Reg) 698 return false; 699 bool RegIsKill = hasTrivialKill(LHS); 700 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 701 AArch64::sub_32); 702 Addr.setOffsetReg(Reg); 703 return true; 704 } 705 } 706 707 unsigned Reg = getRegForValue(Src); 708 if (!Reg) 709 return false; 710 Addr.setOffsetReg(Reg); 711 return true; 712 } 713 case Instruction::Mul: { 714 if (Addr.getOffsetReg()) 715 break; 716 717 if (!isMulPowOf2(U)) 718 break; 719 720 const Value *LHS = U->getOperand(0); 721 const Value *RHS = U->getOperand(1); 722 723 // Canonicalize power-of-2 value to the RHS. 724 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 725 if (C->getValue().isPowerOf2()) 726 std::swap(LHS, RHS); 727 728 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 729 const auto *C = cast<ConstantInt>(RHS); 730 unsigned Val = C->getValue().logBase2(); 731 if (Val < 1 || Val > 3) 732 break; 733 734 uint64_t NumBytes = 0; 735 if (Ty && Ty->isSized()) { 736 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 737 NumBytes = NumBits / 8; 738 if (!isPowerOf2_64(NumBits)) 739 NumBytes = 0; 740 } 741 742 if (NumBytes != (1ULL << Val)) 743 break; 744 745 Addr.setShift(Val); 746 Addr.setExtendType(AArch64_AM::LSL); 747 748 const Value *Src = LHS; 749 if (const auto *I = dyn_cast<Instruction>(Src)) 750 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 751 Src = I; 752 753 754 // Fold the zext or sext when it won't become a noop. 755 if (const auto *ZE = dyn_cast<ZExtInst>(Src)) { 756 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 757 Addr.setExtendType(AArch64_AM::UXTW); 758 Src = ZE->getOperand(0); 759 } 760 } else if (const auto *SE = dyn_cast<SExtInst>(Src)) { 761 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 762 Addr.setExtendType(AArch64_AM::SXTW); 763 Src = SE->getOperand(0); 764 } 765 } 766 767 unsigned Reg = getRegForValue(Src); 768 if (!Reg) 769 return false; 770 Addr.setOffsetReg(Reg); 771 return true; 772 } 773 case Instruction::And: { 774 if (Addr.getOffsetReg()) 775 break; 776 777 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 778 break; 779 780 const Value *LHS = U->getOperand(0); 781 const Value *RHS = U->getOperand(1); 782 783 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 784 if (C->getValue() == 0xffffffff) 785 std::swap(LHS, RHS); 786 787 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 788 if (C->getValue() == 0xffffffff) { 789 Addr.setShift(0); 790 Addr.setExtendType(AArch64_AM::LSL); 791 Addr.setExtendType(AArch64_AM::UXTW); 792 793 unsigned Reg = getRegForValue(LHS); 794 if (!Reg) 795 return false; 796 bool RegIsKill = hasTrivialKill(LHS); 797 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 798 AArch64::sub_32); 799 Addr.setOffsetReg(Reg); 800 return true; 801 } 802 break; 803 } 804 case Instruction::SExt: 805 case Instruction::ZExt: { 806 if (!Addr.getReg() || Addr.getOffsetReg()) 807 break; 808 809 const Value *Src = nullptr; 810 // Fold the zext or sext when it won't become a noop. 811 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 812 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 813 Addr.setExtendType(AArch64_AM::UXTW); 814 Src = ZE->getOperand(0); 815 } 816 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 817 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 818 Addr.setExtendType(AArch64_AM::SXTW); 819 Src = SE->getOperand(0); 820 } 821 } 822 823 if (!Src) 824 break; 825 826 Addr.setShift(0); 827 unsigned Reg = getRegForValue(Src); 828 if (!Reg) 829 return false; 830 Addr.setOffsetReg(Reg); 831 return true; 832 } 833 } // end switch 834 835 if (Addr.isRegBase() && !Addr.getReg()) { 836 unsigned Reg = getRegForValue(Obj); 837 if (!Reg) 838 return false; 839 Addr.setReg(Reg); 840 return true; 841 } 842 843 if (!Addr.getOffsetReg()) { 844 unsigned Reg = getRegForValue(Obj); 845 if (!Reg) 846 return false; 847 Addr.setOffsetReg(Reg); 848 return true; 849 } 850 851 return false; 852 } 853 854 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 855 const User *U = nullptr; 856 unsigned Opcode = Instruction::UserOp1; 857 bool InMBB = true; 858 859 if (const auto *I = dyn_cast<Instruction>(V)) { 860 Opcode = I->getOpcode(); 861 U = I; 862 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 863 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 864 Opcode = C->getOpcode(); 865 U = C; 866 } 867 868 switch (Opcode) { 869 default: break; 870 case Instruction::BitCast: 871 // Look past bitcasts if its operand is in the same BB. 872 if (InMBB) 873 return computeCallAddress(U->getOperand(0), Addr); 874 break; 875 case Instruction::IntToPtr: 876 // Look past no-op inttoptrs if its operand is in the same BB. 877 if (InMBB && 878 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 879 return computeCallAddress(U->getOperand(0), Addr); 880 break; 881 case Instruction::PtrToInt: 882 // Look past no-op ptrtoints if its operand is in the same BB. 883 if (InMBB && 884 TLI.getValueType(U->getType()) == TLI.getPointerTy()) 885 return computeCallAddress(U->getOperand(0), Addr); 886 break; 887 } 888 889 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 890 Addr.setGlobalValue(GV); 891 return true; 892 } 893 894 // If all else fails, try to materialize the value in a register. 895 if (!Addr.getGlobalValue()) { 896 Addr.setReg(getRegForValue(V)); 897 return Addr.getReg() != 0; 898 } 899 900 return false; 901 } 902 903 904 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 905 EVT evt = TLI.getValueType(Ty, true); 906 907 // Only handle simple types. 908 if (evt == MVT::Other || !evt.isSimple()) 909 return false; 910 VT = evt.getSimpleVT(); 911 912 // This is a legal type, but it's not something we handle in fast-isel. 913 if (VT == MVT::f128) 914 return false; 915 916 // Handle all other legal types, i.e. a register that will directly hold this 917 // value. 918 return TLI.isTypeLegal(VT); 919 } 920 921 /// \brief Determine if the value type is supported by FastISel. 922 /// 923 /// FastISel for AArch64 can handle more value types than are legal. This adds 924 /// simple value type such as i1, i8, and i16. 925 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 926 if (Ty->isVectorTy() && !IsVectorAllowed) 927 return false; 928 929 if (isTypeLegal(Ty, VT)) 930 return true; 931 932 // If this is a type than can be sign or zero-extended to a basic operation 933 // go ahead and accept it now. 934 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 935 return true; 936 937 return false; 938 } 939 940 bool AArch64FastISel::isValueAvailable(const Value *V) const { 941 if (!isa<Instruction>(V)) 942 return true; 943 944 const auto *I = cast<Instruction>(V); 945 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 946 return true; 947 948 return false; 949 } 950 951 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 952 unsigned ScaleFactor = getImplicitScaleFactor(VT); 953 if (!ScaleFactor) 954 return false; 955 956 bool ImmediateOffsetNeedsLowering = false; 957 bool RegisterOffsetNeedsLowering = false; 958 int64_t Offset = Addr.getOffset(); 959 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 960 ImmediateOffsetNeedsLowering = true; 961 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 962 !isUInt<12>(Offset / ScaleFactor)) 963 ImmediateOffsetNeedsLowering = true; 964 965 // Cannot encode an offset register and an immediate offset in the same 966 // instruction. Fold the immediate offset into the load/store instruction and 967 // emit an additonal add to take care of the offset register. 968 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 969 RegisterOffsetNeedsLowering = true; 970 971 // Cannot encode zero register as base. 972 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 973 RegisterOffsetNeedsLowering = true; 974 975 // If this is a stack pointer and the offset needs to be simplified then put 976 // the alloca address into a register, set the base type back to register and 977 // continue. This should almost never happen. 978 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 979 { 980 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 981 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 982 ResultReg) 983 .addFrameIndex(Addr.getFI()) 984 .addImm(0) 985 .addImm(0); 986 Addr.setKind(Address::RegBase); 987 Addr.setReg(ResultReg); 988 } 989 990 if (RegisterOffsetNeedsLowering) { 991 unsigned ResultReg = 0; 992 if (Addr.getReg()) { 993 if (Addr.getExtendType() == AArch64_AM::SXTW || 994 Addr.getExtendType() == AArch64_AM::UXTW ) 995 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 996 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 997 /*TODO:IsKill=*/false, Addr.getExtendType(), 998 Addr.getShift()); 999 else 1000 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1001 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1002 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1003 Addr.getShift()); 1004 } else { 1005 if (Addr.getExtendType() == AArch64_AM::UXTW) 1006 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1007 /*Op0IsKill=*/false, Addr.getShift(), 1008 /*IsZExt=*/true); 1009 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1010 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1011 /*Op0IsKill=*/false, Addr.getShift(), 1012 /*IsZExt=*/false); 1013 else 1014 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1015 /*Op0IsKill=*/false, Addr.getShift()); 1016 } 1017 if (!ResultReg) 1018 return false; 1019 1020 Addr.setReg(ResultReg); 1021 Addr.setOffsetReg(0); 1022 Addr.setShift(0); 1023 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1024 } 1025 1026 // Since the offset is too large for the load/store instruction get the 1027 // reg+offset into a register. 1028 if (ImmediateOffsetNeedsLowering) { 1029 unsigned ResultReg; 1030 if (Addr.getReg()) 1031 // Try to fold the immediate into the add instruction. 1032 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1033 else 1034 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1035 1036 if (!ResultReg) 1037 return false; 1038 Addr.setReg(ResultReg); 1039 Addr.setOffset(0); 1040 } 1041 return true; 1042 } 1043 1044 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1045 const MachineInstrBuilder &MIB, 1046 unsigned Flags, 1047 unsigned ScaleFactor, 1048 MachineMemOperand *MMO) { 1049 int64_t Offset = Addr.getOffset() / ScaleFactor; 1050 // Frame base works a bit differently. Handle it separately. 1051 if (Addr.isFIBase()) { 1052 int FI = Addr.getFI(); 1053 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1054 // and alignment should be based on the VT. 1055 MMO = FuncInfo.MF->getMachineMemOperand( 1056 MachinePointerInfo::getFixedStack(FI, Offset), Flags, 1057 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1058 // Now add the rest of the operands. 1059 MIB.addFrameIndex(FI).addImm(Offset); 1060 } else { 1061 assert(Addr.isRegBase() && "Unexpected address kind."); 1062 const MCInstrDesc &II = MIB->getDesc(); 1063 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1064 Addr.setReg( 1065 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1066 Addr.setOffsetReg( 1067 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1068 if (Addr.getOffsetReg()) { 1069 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1070 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1071 Addr.getExtendType() == AArch64_AM::SXTX; 1072 MIB.addReg(Addr.getReg()); 1073 MIB.addReg(Addr.getOffsetReg()); 1074 MIB.addImm(IsSigned); 1075 MIB.addImm(Addr.getShift() != 0); 1076 } else 1077 MIB.addReg(Addr.getReg()).addImm(Offset); 1078 } 1079 1080 if (MMO) 1081 MIB.addMemOperand(MMO); 1082 } 1083 1084 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1085 const Value *RHS, bool SetFlags, 1086 bool WantResult, bool IsZExt) { 1087 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1088 bool NeedExtend = false; 1089 switch (RetVT.SimpleTy) { 1090 default: 1091 return 0; 1092 case MVT::i1: 1093 NeedExtend = true; 1094 break; 1095 case MVT::i8: 1096 NeedExtend = true; 1097 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1098 break; 1099 case MVT::i16: 1100 NeedExtend = true; 1101 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1102 break; 1103 case MVT::i32: // fall-through 1104 case MVT::i64: 1105 break; 1106 } 1107 MVT SrcVT = RetVT; 1108 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1109 1110 // Canonicalize immediates to the RHS first. 1111 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1112 std::swap(LHS, RHS); 1113 1114 // Canonicalize mul by power of 2 to the RHS. 1115 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1116 if (isMulPowOf2(LHS)) 1117 std::swap(LHS, RHS); 1118 1119 // Canonicalize shift immediate to the RHS. 1120 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1121 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1122 if (isa<ConstantInt>(SI->getOperand(1))) 1123 if (SI->getOpcode() == Instruction::Shl || 1124 SI->getOpcode() == Instruction::LShr || 1125 SI->getOpcode() == Instruction::AShr ) 1126 std::swap(LHS, RHS); 1127 1128 unsigned LHSReg = getRegForValue(LHS); 1129 if (!LHSReg) 1130 return 0; 1131 bool LHSIsKill = hasTrivialKill(LHS); 1132 1133 if (NeedExtend) 1134 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1135 1136 unsigned ResultReg = 0; 1137 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1138 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1139 if (C->isNegative()) 1140 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1141 SetFlags, WantResult); 1142 else 1143 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1144 WantResult); 1145 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1146 if (C->isNullValue()) 1147 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1148 WantResult); 1149 1150 if (ResultReg) 1151 return ResultReg; 1152 1153 // Only extend the RHS within the instruction if there is a valid extend type. 1154 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1155 isValueAvailable(RHS)) { 1156 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1157 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1158 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1159 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1160 if (!RHSReg) 1161 return 0; 1162 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1163 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1164 RHSIsKill, ExtendType, C->getZExtValue(), 1165 SetFlags, WantResult); 1166 } 1167 unsigned RHSReg = getRegForValue(RHS); 1168 if (!RHSReg) 1169 return 0; 1170 bool RHSIsKill = hasTrivialKill(RHS); 1171 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1172 ExtendType, 0, SetFlags, WantResult); 1173 } 1174 1175 // Check if the mul can be folded into the instruction. 1176 if (RHS->hasOneUse() && isValueAvailable(RHS)) 1177 if (isMulPowOf2(RHS)) { 1178 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1179 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1180 1181 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1182 if (C->getValue().isPowerOf2()) 1183 std::swap(MulLHS, MulRHS); 1184 1185 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1186 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1187 unsigned RHSReg = getRegForValue(MulLHS); 1188 if (!RHSReg) 1189 return 0; 1190 bool RHSIsKill = hasTrivialKill(MulLHS); 1191 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1192 AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); 1193 } 1194 1195 // Check if the shift can be folded into the instruction. 1196 if (RHS->hasOneUse() && isValueAvailable(RHS)) 1197 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1198 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1199 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1200 switch (SI->getOpcode()) { 1201 default: break; 1202 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1203 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1204 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1205 } 1206 uint64_t ShiftVal = C->getZExtValue(); 1207 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1208 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1209 if (!RHSReg) 1210 return 0; 1211 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1212 return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1213 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1214 WantResult); 1215 } 1216 } 1217 } 1218 1219 unsigned RHSReg = getRegForValue(RHS); 1220 if (!RHSReg) 1221 return 0; 1222 bool RHSIsKill = hasTrivialKill(RHS); 1223 1224 if (NeedExtend) 1225 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1226 1227 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1228 SetFlags, WantResult); 1229 } 1230 1231 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1232 bool LHSIsKill, unsigned RHSReg, 1233 bool RHSIsKill, bool SetFlags, 1234 bool WantResult) { 1235 assert(LHSReg && RHSReg && "Invalid register number."); 1236 1237 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1238 return 0; 1239 1240 static const unsigned OpcTable[2][2][2] = { 1241 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1242 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1243 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1244 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1245 }; 1246 bool Is64Bit = RetVT == MVT::i64; 1247 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1248 const TargetRegisterClass *RC = 1249 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1250 unsigned ResultReg; 1251 if (WantResult) 1252 ResultReg = createResultReg(RC); 1253 else 1254 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1255 1256 const MCInstrDesc &II = TII.get(Opc); 1257 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1258 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1259 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1260 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1261 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1262 return ResultReg; 1263 } 1264 1265 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1266 bool LHSIsKill, uint64_t Imm, 1267 bool SetFlags, bool WantResult) { 1268 assert(LHSReg && "Invalid register number."); 1269 1270 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1271 return 0; 1272 1273 unsigned ShiftImm; 1274 if (isUInt<12>(Imm)) 1275 ShiftImm = 0; 1276 else if ((Imm & 0xfff000) == Imm) { 1277 ShiftImm = 12; 1278 Imm >>= 12; 1279 } else 1280 return 0; 1281 1282 static const unsigned OpcTable[2][2][2] = { 1283 { { AArch64::SUBWri, AArch64::SUBXri }, 1284 { AArch64::ADDWri, AArch64::ADDXri } }, 1285 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1286 { AArch64::ADDSWri, AArch64::ADDSXri } } 1287 }; 1288 bool Is64Bit = RetVT == MVT::i64; 1289 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1290 const TargetRegisterClass *RC; 1291 if (SetFlags) 1292 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1293 else 1294 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1295 unsigned ResultReg; 1296 if (WantResult) 1297 ResultReg = createResultReg(RC); 1298 else 1299 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1300 1301 const MCInstrDesc &II = TII.get(Opc); 1302 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1303 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1304 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1305 .addImm(Imm) 1306 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1307 return ResultReg; 1308 } 1309 1310 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1311 bool LHSIsKill, unsigned RHSReg, 1312 bool RHSIsKill, 1313 AArch64_AM::ShiftExtendType ShiftType, 1314 uint64_t ShiftImm, bool SetFlags, 1315 bool WantResult) { 1316 assert(LHSReg && RHSReg && "Invalid register number."); 1317 1318 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1319 return 0; 1320 1321 static const unsigned OpcTable[2][2][2] = { 1322 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1323 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1324 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1325 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1326 }; 1327 bool Is64Bit = RetVT == MVT::i64; 1328 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1329 const TargetRegisterClass *RC = 1330 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1331 unsigned ResultReg; 1332 if (WantResult) 1333 ResultReg = createResultReg(RC); 1334 else 1335 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1336 1337 const MCInstrDesc &II = TII.get(Opc); 1338 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1339 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1340 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1341 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1342 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1343 .addImm(getShifterImm(ShiftType, ShiftImm)); 1344 return ResultReg; 1345 } 1346 1347 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1348 bool LHSIsKill, unsigned RHSReg, 1349 bool RHSIsKill, 1350 AArch64_AM::ShiftExtendType ExtType, 1351 uint64_t ShiftImm, bool SetFlags, 1352 bool WantResult) { 1353 assert(LHSReg && RHSReg && "Invalid register number."); 1354 1355 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1356 return 0; 1357 1358 static const unsigned OpcTable[2][2][2] = { 1359 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1360 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1361 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1362 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1363 }; 1364 bool Is64Bit = RetVT == MVT::i64; 1365 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1366 const TargetRegisterClass *RC = nullptr; 1367 if (SetFlags) 1368 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1369 else 1370 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1371 unsigned ResultReg; 1372 if (WantResult) 1373 ResultReg = createResultReg(RC); 1374 else 1375 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1376 1377 const MCInstrDesc &II = TII.get(Opc); 1378 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1379 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1381 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1382 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1383 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1384 return ResultReg; 1385 } 1386 1387 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1388 Type *Ty = LHS->getType(); 1389 EVT EVT = TLI.getValueType(Ty, true); 1390 if (!EVT.isSimple()) 1391 return false; 1392 MVT VT = EVT.getSimpleVT(); 1393 1394 switch (VT.SimpleTy) { 1395 default: 1396 return false; 1397 case MVT::i1: 1398 case MVT::i8: 1399 case MVT::i16: 1400 case MVT::i32: 1401 case MVT::i64: 1402 return emitICmp(VT, LHS, RHS, IsZExt); 1403 case MVT::f32: 1404 case MVT::f64: 1405 return emitFCmp(VT, LHS, RHS); 1406 } 1407 } 1408 1409 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1410 bool IsZExt) { 1411 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1412 IsZExt) != 0; 1413 } 1414 1415 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1416 uint64_t Imm) { 1417 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1418 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1419 } 1420 1421 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1422 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1423 return false; 1424 1425 // Check to see if the 2nd operand is a constant that we can encode directly 1426 // in the compare. 1427 bool UseImm = false; 1428 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1429 if (CFP->isZero() && !CFP->isNegative()) 1430 UseImm = true; 1431 1432 unsigned LHSReg = getRegForValue(LHS); 1433 if (!LHSReg) 1434 return false; 1435 bool LHSIsKill = hasTrivialKill(LHS); 1436 1437 if (UseImm) { 1438 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1440 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1441 return true; 1442 } 1443 1444 unsigned RHSReg = getRegForValue(RHS); 1445 if (!RHSReg) 1446 return false; 1447 bool RHSIsKill = hasTrivialKill(RHS); 1448 1449 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1451 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1452 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1453 return true; 1454 } 1455 1456 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1457 bool SetFlags, bool WantResult, bool IsZExt) { 1458 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1459 IsZExt); 1460 } 1461 1462 /// \brief This method is a wrapper to simplify add emission. 1463 /// 1464 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1465 /// that fails, then try to materialize the immediate into a register and use 1466 /// emitAddSub_rr instead. 1467 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1468 int64_t Imm) { 1469 unsigned ResultReg; 1470 if (Imm < 0) 1471 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1472 else 1473 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1474 1475 if (ResultReg) 1476 return ResultReg; 1477 1478 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1479 if (!CReg) 1480 return 0; 1481 1482 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1483 return ResultReg; 1484 } 1485 1486 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1487 bool SetFlags, bool WantResult, bool IsZExt) { 1488 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1489 IsZExt); 1490 } 1491 1492 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1493 bool LHSIsKill, unsigned RHSReg, 1494 bool RHSIsKill, bool WantResult) { 1495 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1496 RHSIsKill, /*SetFlags=*/true, WantResult); 1497 } 1498 1499 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1500 bool LHSIsKill, unsigned RHSReg, 1501 bool RHSIsKill, 1502 AArch64_AM::ShiftExtendType ShiftType, 1503 uint64_t ShiftImm, bool WantResult) { 1504 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1505 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1506 WantResult); 1507 } 1508 1509 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1510 const Value *LHS, const Value *RHS) { 1511 // Canonicalize immediates to the RHS first. 1512 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1513 std::swap(LHS, RHS); 1514 1515 // Canonicalize mul by power-of-2 to the RHS. 1516 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1517 if (isMulPowOf2(LHS)) 1518 std::swap(LHS, RHS); 1519 1520 // Canonicalize shift immediate to the RHS. 1521 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1522 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1523 if (isa<ConstantInt>(SI->getOperand(1))) 1524 std::swap(LHS, RHS); 1525 1526 unsigned LHSReg = getRegForValue(LHS); 1527 if (!LHSReg) 1528 return 0; 1529 bool LHSIsKill = hasTrivialKill(LHS); 1530 1531 unsigned ResultReg = 0; 1532 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1533 uint64_t Imm = C->getZExtValue(); 1534 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1535 } 1536 if (ResultReg) 1537 return ResultReg; 1538 1539 // Check if the mul can be folded into the instruction. 1540 if (RHS->hasOneUse() && isValueAvailable(RHS)) 1541 if (isMulPowOf2(RHS)) { 1542 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1543 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1544 1545 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1546 if (C->getValue().isPowerOf2()) 1547 std::swap(MulLHS, MulRHS); 1548 1549 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1550 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1551 1552 unsigned RHSReg = getRegForValue(MulLHS); 1553 if (!RHSReg) 1554 return 0; 1555 bool RHSIsKill = hasTrivialKill(MulLHS); 1556 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1557 RHSIsKill, ShiftVal); 1558 } 1559 1560 // Check if the shift can be folded into the instruction. 1561 if (RHS->hasOneUse() && isValueAvailable(RHS)) 1562 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1563 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1564 uint64_t ShiftVal = C->getZExtValue(); 1565 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1566 if (!RHSReg) 1567 return 0; 1568 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1569 return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1570 RHSIsKill, ShiftVal); 1571 } 1572 1573 unsigned RHSReg = getRegForValue(RHS); 1574 if (!RHSReg) 1575 return 0; 1576 bool RHSIsKill = hasTrivialKill(RHS); 1577 1578 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1579 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1580 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1581 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1582 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1583 } 1584 return ResultReg; 1585 } 1586 1587 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1588 unsigned LHSReg, bool LHSIsKill, 1589 uint64_t Imm) { 1590 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 1591 "ISD nodes are not consecutive!"); 1592 static const unsigned OpcTable[3][2] = { 1593 { AArch64::ANDWri, AArch64::ANDXri }, 1594 { AArch64::ORRWri, AArch64::ORRXri }, 1595 { AArch64::EORWri, AArch64::EORXri } 1596 }; 1597 const TargetRegisterClass *RC; 1598 unsigned Opc; 1599 unsigned RegSize; 1600 switch (RetVT.SimpleTy) { 1601 default: 1602 return 0; 1603 case MVT::i1: 1604 case MVT::i8: 1605 case MVT::i16: 1606 case MVT::i32: { 1607 unsigned Idx = ISDOpc - ISD::AND; 1608 Opc = OpcTable[Idx][0]; 1609 RC = &AArch64::GPR32spRegClass; 1610 RegSize = 32; 1611 break; 1612 } 1613 case MVT::i64: 1614 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1615 RC = &AArch64::GPR64spRegClass; 1616 RegSize = 64; 1617 break; 1618 } 1619 1620 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1621 return 0; 1622 1623 unsigned ResultReg = 1624 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1625 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1626 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1627 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1628 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1629 } 1630 return ResultReg; 1631 } 1632 1633 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1634 unsigned LHSReg, bool LHSIsKill, 1635 unsigned RHSReg, bool RHSIsKill, 1636 uint64_t ShiftImm) { 1637 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 1638 "ISD nodes are not consecutive!"); 1639 static const unsigned OpcTable[3][2] = { 1640 { AArch64::ANDWrs, AArch64::ANDXrs }, 1641 { AArch64::ORRWrs, AArch64::ORRXrs }, 1642 { AArch64::EORWrs, AArch64::EORXrs } 1643 }; 1644 const TargetRegisterClass *RC; 1645 unsigned Opc; 1646 switch (RetVT.SimpleTy) { 1647 default: 1648 return 0; 1649 case MVT::i1: 1650 case MVT::i8: 1651 case MVT::i16: 1652 case MVT::i32: 1653 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1654 RC = &AArch64::GPR32RegClass; 1655 break; 1656 case MVT::i64: 1657 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1658 RC = &AArch64::GPR64RegClass; 1659 break; 1660 } 1661 unsigned ResultReg = 1662 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1663 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1664 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1665 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1666 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1667 } 1668 return ResultReg; 1669 } 1670 1671 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1672 uint64_t Imm) { 1673 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1674 } 1675 1676 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1677 bool WantZExt, MachineMemOperand *MMO) { 1678 // Simplify this down to something we can handle. 1679 if (!simplifyAddress(Addr, VT)) 1680 return 0; 1681 1682 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1683 if (!ScaleFactor) 1684 llvm_unreachable("Unexpected value type."); 1685 1686 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1687 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1688 bool UseScaled = true; 1689 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1690 UseScaled = false; 1691 ScaleFactor = 1; 1692 } 1693 1694 static const unsigned GPOpcTable[2][8][4] = { 1695 // Sign-extend. 1696 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1697 AArch64::LDURXi }, 1698 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1699 AArch64::LDURXi }, 1700 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1701 AArch64::LDRXui }, 1702 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1703 AArch64::LDRXui }, 1704 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1705 AArch64::LDRXroX }, 1706 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1707 AArch64::LDRXroX }, 1708 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1709 AArch64::LDRXroW }, 1710 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1711 AArch64::LDRXroW } 1712 }, 1713 // Zero-extend. 1714 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1715 AArch64::LDURXi }, 1716 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1717 AArch64::LDURXi }, 1718 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1719 AArch64::LDRXui }, 1720 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1721 AArch64::LDRXui }, 1722 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1723 AArch64::LDRXroX }, 1724 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1725 AArch64::LDRXroX }, 1726 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1727 AArch64::LDRXroW }, 1728 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1729 AArch64::LDRXroW } 1730 } 1731 }; 1732 1733 static const unsigned FPOpcTable[4][2] = { 1734 { AArch64::LDURSi, AArch64::LDURDi }, 1735 { AArch64::LDRSui, AArch64::LDRDui }, 1736 { AArch64::LDRSroX, AArch64::LDRDroX }, 1737 { AArch64::LDRSroW, AArch64::LDRDroW } 1738 }; 1739 1740 unsigned Opc; 1741 const TargetRegisterClass *RC; 1742 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1743 Addr.getOffsetReg(); 1744 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1745 if (Addr.getExtendType() == AArch64_AM::UXTW || 1746 Addr.getExtendType() == AArch64_AM::SXTW) 1747 Idx++; 1748 1749 bool IsRet64Bit = RetVT == MVT::i64; 1750 switch (VT.SimpleTy) { 1751 default: 1752 llvm_unreachable("Unexpected value type."); 1753 case MVT::i1: // Intentional fall-through. 1754 case MVT::i8: 1755 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1756 RC = (IsRet64Bit && !WantZExt) ? 1757 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1758 break; 1759 case MVT::i16: 1760 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1761 RC = (IsRet64Bit && !WantZExt) ? 1762 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1763 break; 1764 case MVT::i32: 1765 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1766 RC = (IsRet64Bit && !WantZExt) ? 1767 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1768 break; 1769 case MVT::i64: 1770 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1771 RC = &AArch64::GPR64RegClass; 1772 break; 1773 case MVT::f32: 1774 Opc = FPOpcTable[Idx][0]; 1775 RC = &AArch64::FPR32RegClass; 1776 break; 1777 case MVT::f64: 1778 Opc = FPOpcTable[Idx][1]; 1779 RC = &AArch64::FPR64RegClass; 1780 break; 1781 } 1782 1783 // Create the base instruction, then add the operands. 1784 unsigned ResultReg = createResultReg(RC); 1785 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1786 TII.get(Opc), ResultReg); 1787 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1788 1789 // Loading an i1 requires special handling. 1790 if (VT == MVT::i1) { 1791 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1792 assert(ANDReg && "Unexpected AND instruction emission failure."); 1793 ResultReg = ANDReg; 1794 } 1795 1796 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1797 // the 32bit reg to a 64bit reg. 1798 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1799 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1800 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1801 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1802 .addImm(0) 1803 .addReg(ResultReg, getKillRegState(true)) 1804 .addImm(AArch64::sub_32); 1805 ResultReg = Reg64; 1806 } 1807 return ResultReg; 1808 } 1809 1810 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1811 MVT VT; 1812 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1813 return false; 1814 1815 if (VT.isVector()) 1816 return selectOperator(I, I->getOpcode()); 1817 1818 unsigned ResultReg; 1819 switch (I->getOpcode()) { 1820 default: 1821 llvm_unreachable("Unexpected instruction."); 1822 case Instruction::Add: 1823 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1824 break; 1825 case Instruction::Sub: 1826 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1827 break; 1828 } 1829 if (!ResultReg) 1830 return false; 1831 1832 updateValueMap(I, ResultReg); 1833 return true; 1834 } 1835 1836 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1837 MVT VT; 1838 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1839 return false; 1840 1841 if (VT.isVector()) 1842 return selectOperator(I, I->getOpcode()); 1843 1844 unsigned ResultReg; 1845 switch (I->getOpcode()) { 1846 default: 1847 llvm_unreachable("Unexpected instruction."); 1848 case Instruction::And: 1849 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1850 break; 1851 case Instruction::Or: 1852 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1853 break; 1854 case Instruction::Xor: 1855 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1856 break; 1857 } 1858 if (!ResultReg) 1859 return false; 1860 1861 updateValueMap(I, ResultReg); 1862 return true; 1863 } 1864 1865 bool AArch64FastISel::selectLoad(const Instruction *I) { 1866 MVT VT; 1867 // Verify we have a legal type before going any further. Currently, we handle 1868 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1869 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1870 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1871 cast<LoadInst>(I)->isAtomic()) 1872 return false; 1873 1874 // See if we can handle this address. 1875 Address Addr; 1876 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1877 return false; 1878 1879 // Fold the following sign-/zero-extend into the load instruction. 1880 bool WantZExt = true; 1881 MVT RetVT = VT; 1882 const Value *IntExtVal = nullptr; 1883 if (I->hasOneUse()) { 1884 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1885 if (isTypeSupported(ZE->getType(), RetVT)) 1886 IntExtVal = ZE; 1887 else 1888 RetVT = VT; 1889 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1890 if (isTypeSupported(SE->getType(), RetVT)) 1891 IntExtVal = SE; 1892 else 1893 RetVT = VT; 1894 WantZExt = false; 1895 } 1896 } 1897 1898 unsigned ResultReg = 1899 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1900 if (!ResultReg) 1901 return false; 1902 1903 // There are a few different cases we have to handle, because the load or the 1904 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1905 // SelectionDAG. There is also an ordering issue when both instructions are in 1906 // different basic blocks. 1907 // 1.) The load instruction is selected by FastISel, but the integer extend 1908 // not. This usually happens when the integer extend is in a different 1909 // basic block and SelectionDAG took over for that basic block. 1910 // 2.) The load instruction is selected before the integer extend. This only 1911 // happens when the integer extend is in a different basic block. 1912 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1913 // by FastISel. This happens if there are instructions between the load 1914 // and the integer extend that couldn't be selected by FastISel. 1915 if (IntExtVal) { 1916 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1917 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1918 // it when it selects the integer extend. 1919 unsigned Reg = lookUpRegForValue(IntExtVal); 1920 auto *MI = MRI.getUniqueVRegDef(Reg); 1921 if (!MI) { 1922 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1923 if (WantZExt) { 1924 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 1925 std::prev(FuncInfo.InsertPt)->eraseFromParent(); 1926 ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); 1927 } else 1928 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 1929 /*IsKill=*/true, 1930 AArch64::sub_32); 1931 } 1932 updateValueMap(I, ResultReg); 1933 return true; 1934 } 1935 1936 // The integer extend has already been emitted - delete all the instructions 1937 // that have been emitted by the integer extend lowering code and use the 1938 // result from the load instruction directly. 1939 while (MI) { 1940 Reg = 0; 1941 for (auto &Opnd : MI->uses()) { 1942 if (Opnd.isReg()) { 1943 Reg = Opnd.getReg(); 1944 break; 1945 } 1946 } 1947 MI->eraseFromParent(); 1948 MI = nullptr; 1949 if (Reg) 1950 MI = MRI.getUniqueVRegDef(Reg); 1951 } 1952 updateValueMap(IntExtVal, ResultReg); 1953 return true; 1954 } 1955 1956 updateValueMap(I, ResultReg); 1957 return true; 1958 } 1959 1960 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 1961 MachineMemOperand *MMO) { 1962 // Simplify this down to something we can handle. 1963 if (!simplifyAddress(Addr, VT)) 1964 return false; 1965 1966 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1967 if (!ScaleFactor) 1968 llvm_unreachable("Unexpected value type."); 1969 1970 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1971 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1972 bool UseScaled = true; 1973 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1974 UseScaled = false; 1975 ScaleFactor = 1; 1976 } 1977 1978 static const unsigned OpcTable[4][6] = { 1979 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 1980 AArch64::STURSi, AArch64::STURDi }, 1981 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 1982 AArch64::STRSui, AArch64::STRDui }, 1983 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 1984 AArch64::STRSroX, AArch64::STRDroX }, 1985 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 1986 AArch64::STRSroW, AArch64::STRDroW } 1987 }; 1988 1989 unsigned Opc; 1990 bool VTIsi1 = false; 1991 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1992 Addr.getOffsetReg(); 1993 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1994 if (Addr.getExtendType() == AArch64_AM::UXTW || 1995 Addr.getExtendType() == AArch64_AM::SXTW) 1996 Idx++; 1997 1998 switch (VT.SimpleTy) { 1999 default: llvm_unreachable("Unexpected value type."); 2000 case MVT::i1: VTIsi1 = true; 2001 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2002 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2003 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2004 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2005 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2006 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2007 } 2008 2009 // Storing an i1 requires special handling. 2010 if (VTIsi1 && SrcReg != AArch64::WZR) { 2011 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2012 assert(ANDReg && "Unexpected AND instruction emission failure."); 2013 SrcReg = ANDReg; 2014 } 2015 // Create the base instruction, then add the operands. 2016 const MCInstrDesc &II = TII.get(Opc); 2017 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2018 MachineInstrBuilder MIB = 2019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2020 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2021 2022 return true; 2023 } 2024 2025 bool AArch64FastISel::selectStore(const Instruction *I) { 2026 MVT VT; 2027 const Value *Op0 = I->getOperand(0); 2028 // Verify we have a legal type before going any further. Currently, we handle 2029 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2030 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2031 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) || 2032 cast<StoreInst>(I)->isAtomic()) 2033 return false; 2034 2035 // Get the value to be stored into a register. Use the zero register directly 2036 // when possible to avoid an unnecessary copy and a wasted register. 2037 unsigned SrcReg = 0; 2038 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2039 if (CI->isZero()) 2040 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2041 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2042 if (CF->isZero() && !CF->isNegative()) { 2043 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2044 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2045 } 2046 } 2047 2048 if (!SrcReg) 2049 SrcReg = getRegForValue(Op0); 2050 2051 if (!SrcReg) 2052 return false; 2053 2054 // See if we can handle this address. 2055 Address Addr; 2056 if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) 2057 return false; 2058 2059 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2060 return false; 2061 return true; 2062 } 2063 2064 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2065 switch (Pred) { 2066 case CmpInst::FCMP_ONE: 2067 case CmpInst::FCMP_UEQ: 2068 default: 2069 // AL is our "false" for now. The other two need more compares. 2070 return AArch64CC::AL; 2071 case CmpInst::ICMP_EQ: 2072 case CmpInst::FCMP_OEQ: 2073 return AArch64CC::EQ; 2074 case CmpInst::ICMP_SGT: 2075 case CmpInst::FCMP_OGT: 2076 return AArch64CC::GT; 2077 case CmpInst::ICMP_SGE: 2078 case CmpInst::FCMP_OGE: 2079 return AArch64CC::GE; 2080 case CmpInst::ICMP_UGT: 2081 case CmpInst::FCMP_UGT: 2082 return AArch64CC::HI; 2083 case CmpInst::FCMP_OLT: 2084 return AArch64CC::MI; 2085 case CmpInst::ICMP_ULE: 2086 case CmpInst::FCMP_OLE: 2087 return AArch64CC::LS; 2088 case CmpInst::FCMP_ORD: 2089 return AArch64CC::VC; 2090 case CmpInst::FCMP_UNO: 2091 return AArch64CC::VS; 2092 case CmpInst::FCMP_UGE: 2093 return AArch64CC::PL; 2094 case CmpInst::ICMP_SLT: 2095 case CmpInst::FCMP_ULT: 2096 return AArch64CC::LT; 2097 case CmpInst::ICMP_SLE: 2098 case CmpInst::FCMP_ULE: 2099 return AArch64CC::LE; 2100 case CmpInst::FCMP_UNE: 2101 case CmpInst::ICMP_NE: 2102 return AArch64CC::NE; 2103 case CmpInst::ICMP_UGE: 2104 return AArch64CC::HS; 2105 case CmpInst::ICMP_ULT: 2106 return AArch64CC::LO; 2107 } 2108 } 2109 2110 /// \brief Try to emit a combined compare-and-branch instruction. 2111 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2112 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2113 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2114 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2115 2116 const Value *LHS = CI->getOperand(0); 2117 const Value *RHS = CI->getOperand(1); 2118 2119 MVT VT; 2120 if (!isTypeSupported(LHS->getType(), VT)) 2121 return false; 2122 2123 unsigned BW = VT.getSizeInBits(); 2124 if (BW > 64) 2125 return false; 2126 2127 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2128 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2129 2130 // Try to take advantage of fallthrough opportunities. 2131 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2132 std::swap(TBB, FBB); 2133 Predicate = CmpInst::getInversePredicate(Predicate); 2134 } 2135 2136 int TestBit = -1; 2137 bool IsCmpNE; 2138 switch (Predicate) { 2139 default: 2140 return false; 2141 case CmpInst::ICMP_EQ: 2142 case CmpInst::ICMP_NE: 2143 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2144 std::swap(LHS, RHS); 2145 2146 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2147 return false; 2148 2149 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2150 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2151 const Value *AndLHS = AI->getOperand(0); 2152 const Value *AndRHS = AI->getOperand(1); 2153 2154 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2155 if (C->getValue().isPowerOf2()) 2156 std::swap(AndLHS, AndRHS); 2157 2158 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2159 if (C->getValue().isPowerOf2()) { 2160 TestBit = C->getValue().logBase2(); 2161 LHS = AndLHS; 2162 } 2163 } 2164 2165 if (VT == MVT::i1) 2166 TestBit = 0; 2167 2168 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2169 break; 2170 case CmpInst::ICMP_SLT: 2171 case CmpInst::ICMP_SGE: 2172 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2173 return false; 2174 2175 TestBit = BW - 1; 2176 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2177 break; 2178 case CmpInst::ICMP_SGT: 2179 case CmpInst::ICMP_SLE: 2180 if (!isa<ConstantInt>(RHS)) 2181 return false; 2182 2183 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2184 return false; 2185 2186 TestBit = BW - 1; 2187 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2188 break; 2189 } // end switch 2190 2191 static const unsigned OpcTable[2][2][2] = { 2192 { {AArch64::CBZW, AArch64::CBZX }, 2193 {AArch64::CBNZW, AArch64::CBNZX} }, 2194 { {AArch64::TBZW, AArch64::TBZX }, 2195 {AArch64::TBNZW, AArch64::TBNZX} } 2196 }; 2197 2198 bool IsBitTest = TestBit != -1; 2199 bool Is64Bit = BW == 64; 2200 if (TestBit < 32 && TestBit >= 0) 2201 Is64Bit = false; 2202 2203 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2204 const MCInstrDesc &II = TII.get(Opc); 2205 2206 unsigned SrcReg = getRegForValue(LHS); 2207 if (!SrcReg) 2208 return false; 2209 bool SrcIsKill = hasTrivialKill(LHS); 2210 2211 if (BW == 64 && !Is64Bit) 2212 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2213 AArch64::sub_32); 2214 2215 if ((BW < 32) && !IsBitTest) 2216 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); 2217 2218 // Emit the combined compare and branch instruction. 2219 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2220 MachineInstrBuilder MIB = 2221 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2222 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2223 if (IsBitTest) 2224 MIB.addImm(TestBit); 2225 MIB.addMBB(TBB); 2226 2227 // Obtain the branch weight and add the TrueBB to the successor list. 2228 uint32_t BranchWeight = 0; 2229 if (FuncInfo.BPI) 2230 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2231 TBB->getBasicBlock()); 2232 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 2233 fastEmitBranch(FBB, DbgLoc); 2234 2235 return true; 2236 } 2237 2238 bool AArch64FastISel::selectBranch(const Instruction *I) { 2239 const BranchInst *BI = cast<BranchInst>(I); 2240 if (BI->isUnconditional()) { 2241 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2242 fastEmitBranch(MSucc, BI->getDebugLoc()); 2243 return true; 2244 } 2245 2246 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2247 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2248 2249 AArch64CC::CondCode CC = AArch64CC::NE; 2250 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2251 if (CI->hasOneUse() && isValueAvailable(CI)) { 2252 // Try to optimize or fold the cmp. 2253 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2254 switch (Predicate) { 2255 default: 2256 break; 2257 case CmpInst::FCMP_FALSE: 2258 fastEmitBranch(FBB, DbgLoc); 2259 return true; 2260 case CmpInst::FCMP_TRUE: 2261 fastEmitBranch(TBB, DbgLoc); 2262 return true; 2263 } 2264 2265 // Try to emit a combined compare-and-branch first. 2266 if (emitCompareAndBranch(BI)) 2267 return true; 2268 2269 // Try to take advantage of fallthrough opportunities. 2270 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2271 std::swap(TBB, FBB); 2272 Predicate = CmpInst::getInversePredicate(Predicate); 2273 } 2274 2275 // Emit the cmp. 2276 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2277 return false; 2278 2279 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2280 // instruction. 2281 CC = getCompareCC(Predicate); 2282 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2283 switch (Predicate) { 2284 default: 2285 break; 2286 case CmpInst::FCMP_UEQ: 2287 ExtraCC = AArch64CC::EQ; 2288 CC = AArch64CC::VS; 2289 break; 2290 case CmpInst::FCMP_ONE: 2291 ExtraCC = AArch64CC::MI; 2292 CC = AArch64CC::GT; 2293 break; 2294 } 2295 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2296 2297 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2298 if (ExtraCC != AArch64CC::AL) { 2299 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2300 .addImm(ExtraCC) 2301 .addMBB(TBB); 2302 } 2303 2304 // Emit the branch. 2305 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2306 .addImm(CC) 2307 .addMBB(TBB); 2308 2309 // Obtain the branch weight and add the TrueBB to the successor list. 2310 uint32_t BranchWeight = 0; 2311 if (FuncInfo.BPI) 2312 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2313 TBB->getBasicBlock()); 2314 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 2315 2316 fastEmitBranch(FBB, DbgLoc); 2317 return true; 2318 } 2319 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 2320 MVT SrcVT; 2321 if (TI->hasOneUse() && isValueAvailable(TI) && 2322 isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { 2323 unsigned CondReg = getRegForValue(TI->getOperand(0)); 2324 if (!CondReg) 2325 return false; 2326 bool CondIsKill = hasTrivialKill(TI->getOperand(0)); 2327 2328 // Issue an extract_subreg to get the lower 32-bits. 2329 if (SrcVT == MVT::i64) { 2330 CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, 2331 AArch64::sub_32); 2332 CondIsKill = true; 2333 } 2334 2335 unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); 2336 assert(ANDReg && "Unexpected AND instruction emission failure."); 2337 emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); 2338 2339 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2340 std::swap(TBB, FBB); 2341 CC = AArch64CC::EQ; 2342 } 2343 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2344 .addImm(CC) 2345 .addMBB(TBB); 2346 2347 // Obtain the branch weight and add the TrueBB to the successor list. 2348 uint32_t BranchWeight = 0; 2349 if (FuncInfo.BPI) 2350 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2351 TBB->getBasicBlock()); 2352 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 2353 2354 fastEmitBranch(FBB, DbgLoc); 2355 return true; 2356 } 2357 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2358 uint64_t Imm = CI->getZExtValue(); 2359 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2361 .addMBB(Target); 2362 2363 // Obtain the branch weight and add the target to the successor list. 2364 uint32_t BranchWeight = 0; 2365 if (FuncInfo.BPI) 2366 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2367 Target->getBasicBlock()); 2368 FuncInfo.MBB->addSuccessor(Target, BranchWeight); 2369 return true; 2370 } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2371 // Fake request the condition, otherwise the intrinsic might be completely 2372 // optimized away. 2373 unsigned CondReg = getRegForValue(BI->getCondition()); 2374 if (!CondReg) 2375 return false; 2376 2377 // Emit the branch. 2378 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2379 .addImm(CC) 2380 .addMBB(TBB); 2381 2382 // Obtain the branch weight and add the TrueBB to the successor list. 2383 uint32_t BranchWeight = 0; 2384 if (FuncInfo.BPI) 2385 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2386 TBB->getBasicBlock()); 2387 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 2388 2389 fastEmitBranch(FBB, DbgLoc); 2390 return true; 2391 } 2392 2393 unsigned CondReg = getRegForValue(BI->getCondition()); 2394 if (CondReg == 0) 2395 return false; 2396 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2397 2398 // We've been divorced from our compare! Our block was split, and 2399 // now our compare lives in a predecessor block. We musn't 2400 // re-compare here, as the children of the compare aren't guaranteed 2401 // live across the block boundary (we *could* check for this). 2402 // Regardless, the compare has been done in the predecessor block, 2403 // and it left a value for us in a virtual register. Ergo, we test 2404 // the one-bit value left in the virtual register. 2405 emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); 2406 2407 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2408 std::swap(TBB, FBB); 2409 CC = AArch64CC::EQ; 2410 } 2411 2412 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2413 .addImm(CC) 2414 .addMBB(TBB); 2415 2416 // Obtain the branch weight and add the TrueBB to the successor list. 2417 uint32_t BranchWeight = 0; 2418 if (FuncInfo.BPI) 2419 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 2420 TBB->getBasicBlock()); 2421 FuncInfo.MBB->addSuccessor(TBB, BranchWeight); 2422 2423 fastEmitBranch(FBB, DbgLoc); 2424 return true; 2425 } 2426 2427 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2428 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2429 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2430 if (AddrReg == 0) 2431 return false; 2432 2433 // Emit the indirect branch. 2434 const MCInstrDesc &II = TII.get(AArch64::BR); 2435 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2437 2438 // Make sure the CFG is up-to-date. 2439 for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) 2440 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); 2441 2442 return true; 2443 } 2444 2445 bool AArch64FastISel::selectCmp(const Instruction *I) { 2446 const CmpInst *CI = cast<CmpInst>(I); 2447 2448 // Try to optimize or fold the cmp. 2449 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2450 unsigned ResultReg = 0; 2451 switch (Predicate) { 2452 default: 2453 break; 2454 case CmpInst::FCMP_FALSE: 2455 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2457 TII.get(TargetOpcode::COPY), ResultReg) 2458 .addReg(AArch64::WZR, getKillRegState(true)); 2459 break; 2460 case CmpInst::FCMP_TRUE: 2461 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2462 break; 2463 } 2464 2465 if (ResultReg) { 2466 updateValueMap(I, ResultReg); 2467 return true; 2468 } 2469 2470 // Emit the cmp. 2471 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2472 return false; 2473 2474 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2475 2476 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2477 // condition codes are inverted, because they are used by CSINC. 2478 static unsigned CondCodeTable[2][2] = { 2479 { AArch64CC::NE, AArch64CC::VC }, 2480 { AArch64CC::PL, AArch64CC::LE } 2481 }; 2482 unsigned *CondCodes = nullptr; 2483 switch (Predicate) { 2484 default: 2485 break; 2486 case CmpInst::FCMP_UEQ: 2487 CondCodes = &CondCodeTable[0][0]; 2488 break; 2489 case CmpInst::FCMP_ONE: 2490 CondCodes = &CondCodeTable[1][0]; 2491 break; 2492 } 2493 2494 if (CondCodes) { 2495 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2497 TmpReg1) 2498 .addReg(AArch64::WZR, getKillRegState(true)) 2499 .addReg(AArch64::WZR, getKillRegState(true)) 2500 .addImm(CondCodes[0]); 2501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2502 ResultReg) 2503 .addReg(TmpReg1, getKillRegState(true)) 2504 .addReg(AArch64::WZR, getKillRegState(true)) 2505 .addImm(CondCodes[1]); 2506 2507 updateValueMap(I, ResultReg); 2508 return true; 2509 } 2510 2511 // Now set a register based on the comparison. 2512 AArch64CC::CondCode CC = getCompareCC(Predicate); 2513 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2514 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2516 ResultReg) 2517 .addReg(AArch64::WZR, getKillRegState(true)) 2518 .addReg(AArch64::WZR, getKillRegState(true)) 2519 .addImm(invertedCC); 2520 2521 updateValueMap(I, ResultReg); 2522 return true; 2523 } 2524 2525 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' 2526 /// value. 2527 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2528 if (!SI->getType()->isIntegerTy(1)) 2529 return false; 2530 2531 const Value *Src1Val, *Src2Val; 2532 unsigned Opc = 0; 2533 bool NeedExtraOp = false; 2534 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2535 if (CI->isOne()) { 2536 Src1Val = SI->getCondition(); 2537 Src2Val = SI->getFalseValue(); 2538 Opc = AArch64::ORRWrr; 2539 } else { 2540 assert(CI->isZero()); 2541 Src1Val = SI->getFalseValue(); 2542 Src2Val = SI->getCondition(); 2543 Opc = AArch64::BICWrr; 2544 } 2545 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2546 if (CI->isOne()) { 2547 Src1Val = SI->getCondition(); 2548 Src2Val = SI->getTrueValue(); 2549 Opc = AArch64::ORRWrr; 2550 NeedExtraOp = true; 2551 } else { 2552 assert(CI->isZero()); 2553 Src1Val = SI->getCondition(); 2554 Src2Val = SI->getTrueValue(); 2555 Opc = AArch64::ANDWrr; 2556 } 2557 } 2558 2559 if (!Opc) 2560 return false; 2561 2562 unsigned Src1Reg = getRegForValue(Src1Val); 2563 if (!Src1Reg) 2564 return false; 2565 bool Src1IsKill = hasTrivialKill(Src1Val); 2566 2567 unsigned Src2Reg = getRegForValue(Src2Val); 2568 if (!Src2Reg) 2569 return false; 2570 bool Src2IsKill = hasTrivialKill(Src2Val); 2571 2572 if (NeedExtraOp) { 2573 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2574 Src1IsKill = true; 2575 } 2576 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2577 Src1IsKill, Src2Reg, Src2IsKill); 2578 updateValueMap(SI, ResultReg); 2579 return true; 2580 } 2581 2582 bool AArch64FastISel::selectSelect(const Instruction *I) { 2583 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2584 MVT VT; 2585 if (!isTypeSupported(I->getType(), VT)) 2586 return false; 2587 2588 unsigned Opc; 2589 const TargetRegisterClass *RC; 2590 switch (VT.SimpleTy) { 2591 default: 2592 return false; 2593 case MVT::i1: 2594 case MVT::i8: 2595 case MVT::i16: 2596 case MVT::i32: 2597 Opc = AArch64::CSELWr; 2598 RC = &AArch64::GPR32RegClass; 2599 break; 2600 case MVT::i64: 2601 Opc = AArch64::CSELXr; 2602 RC = &AArch64::GPR64RegClass; 2603 break; 2604 case MVT::f32: 2605 Opc = AArch64::FCSELSrrr; 2606 RC = &AArch64::FPR32RegClass; 2607 break; 2608 case MVT::f64: 2609 Opc = AArch64::FCSELDrrr; 2610 RC = &AArch64::FPR64RegClass; 2611 break; 2612 } 2613 2614 const SelectInst *SI = cast<SelectInst>(I); 2615 const Value *Cond = SI->getCondition(); 2616 AArch64CC::CondCode CC = AArch64CC::NE; 2617 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2618 2619 if (optimizeSelect(SI)) 2620 return true; 2621 2622 // Try to pickup the flags, so we don't have to emit another compare. 2623 if (foldXALUIntrinsic(CC, I, Cond)) { 2624 // Fake request the condition to force emission of the XALU intrinsic. 2625 unsigned CondReg = getRegForValue(Cond); 2626 if (!CondReg) 2627 return false; 2628 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2629 isValueAvailable(Cond)) { 2630 const auto *Cmp = cast<CmpInst>(Cond); 2631 // Try to optimize or fold the cmp. 2632 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2633 const Value *FoldSelect = nullptr; 2634 switch (Predicate) { 2635 default: 2636 break; 2637 case CmpInst::FCMP_FALSE: 2638 FoldSelect = SI->getFalseValue(); 2639 break; 2640 case CmpInst::FCMP_TRUE: 2641 FoldSelect = SI->getTrueValue(); 2642 break; 2643 } 2644 2645 if (FoldSelect) { 2646 unsigned SrcReg = getRegForValue(FoldSelect); 2647 if (!SrcReg) 2648 return false; 2649 unsigned UseReg = lookUpRegForValue(SI); 2650 if (UseReg) 2651 MRI.clearKillFlags(UseReg); 2652 2653 updateValueMap(I, SrcReg); 2654 return true; 2655 } 2656 2657 // Emit the cmp. 2658 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2659 return false; 2660 2661 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2662 CC = getCompareCC(Predicate); 2663 switch (Predicate) { 2664 default: 2665 break; 2666 case CmpInst::FCMP_UEQ: 2667 ExtraCC = AArch64CC::EQ; 2668 CC = AArch64CC::VS; 2669 break; 2670 case CmpInst::FCMP_ONE: 2671 ExtraCC = AArch64CC::MI; 2672 CC = AArch64CC::GT; 2673 break; 2674 } 2675 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2676 } else { 2677 unsigned CondReg = getRegForValue(Cond); 2678 if (!CondReg) 2679 return false; 2680 bool CondIsKill = hasTrivialKill(Cond); 2681 2682 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2683 CondReg = constrainOperandRegClass(II, CondReg, 1); 2684 2685 // Emit a TST instruction (ANDS wzr, reg, #imm). 2686 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2687 AArch64::WZR) 2688 .addReg(CondReg, getKillRegState(CondIsKill)) 2689 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2690 } 2691 2692 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2693 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2694 2695 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2696 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2697 2698 if (!Src1Reg || !Src2Reg) 2699 return false; 2700 2701 if (ExtraCC != AArch64CC::AL) { 2702 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2703 Src2IsKill, ExtraCC); 2704 Src2IsKill = true; 2705 } 2706 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2707 Src2IsKill, CC); 2708 updateValueMap(I, ResultReg); 2709 return true; 2710 } 2711 2712 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2713 Value *V = I->getOperand(0); 2714 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2715 return false; 2716 2717 unsigned Op = getRegForValue(V); 2718 if (Op == 0) 2719 return false; 2720 2721 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2722 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2723 ResultReg).addReg(Op); 2724 updateValueMap(I, ResultReg); 2725 return true; 2726 } 2727 2728 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2729 Value *V = I->getOperand(0); 2730 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2731 return false; 2732 2733 unsigned Op = getRegForValue(V); 2734 if (Op == 0) 2735 return false; 2736 2737 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2739 ResultReg).addReg(Op); 2740 updateValueMap(I, ResultReg); 2741 return true; 2742 } 2743 2744 // FPToUI and FPToSI 2745 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2746 MVT DestVT; 2747 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2748 return false; 2749 2750 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2751 if (SrcReg == 0) 2752 return false; 2753 2754 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 2755 if (SrcVT == MVT::f128) 2756 return false; 2757 2758 unsigned Opc; 2759 if (SrcVT == MVT::f64) { 2760 if (Signed) 2761 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2762 else 2763 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2764 } else { 2765 if (Signed) 2766 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2767 else 2768 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2769 } 2770 unsigned ResultReg = createResultReg( 2771 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2773 .addReg(SrcReg); 2774 updateValueMap(I, ResultReg); 2775 return true; 2776 } 2777 2778 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2779 MVT DestVT; 2780 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2781 return false; 2782 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && 2783 "Unexpected value type."); 2784 2785 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2786 if (!SrcReg) 2787 return false; 2788 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2789 2790 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 2791 2792 // Handle sign-extension. 2793 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2794 SrcReg = 2795 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2796 if (!SrcReg) 2797 return false; 2798 SrcIsKill = true; 2799 } 2800 2801 unsigned Opc; 2802 if (SrcVT == MVT::i64) { 2803 if (Signed) 2804 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2805 else 2806 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2807 } else { 2808 if (Signed) 2809 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2810 else 2811 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2812 } 2813 2814 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2815 SrcIsKill); 2816 updateValueMap(I, ResultReg); 2817 return true; 2818 } 2819 2820 bool AArch64FastISel::fastLowerArguments() { 2821 if (!FuncInfo.CanLowerReturn) 2822 return false; 2823 2824 const Function *F = FuncInfo.Fn; 2825 if (F->isVarArg()) 2826 return false; 2827 2828 CallingConv::ID CC = F->getCallingConv(); 2829 if (CC != CallingConv::C) 2830 return false; 2831 2832 // Only handle simple cases of up to 8 GPR and FPR each. 2833 unsigned GPRCnt = 0; 2834 unsigned FPRCnt = 0; 2835 unsigned Idx = 0; 2836 for (auto const &Arg : F->args()) { 2837 // The first argument is at index 1. 2838 ++Idx; 2839 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2840 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2841 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2842 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2843 return false; 2844 2845 Type *ArgTy = Arg.getType(); 2846 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2847 return false; 2848 2849 EVT ArgVT = TLI.getValueType(ArgTy); 2850 if (!ArgVT.isSimple()) 2851 return false; 2852 2853 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2854 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2855 return false; 2856 2857 if (VT.isVector() && 2858 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2859 return false; 2860 2861 if (VT >= MVT::i1 && VT <= MVT::i64) 2862 ++GPRCnt; 2863 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2864 VT.is128BitVector()) 2865 ++FPRCnt; 2866 else 2867 return false; 2868 2869 if (GPRCnt > 8 || FPRCnt > 8) 2870 return false; 2871 } 2872 2873 static const MCPhysReg Registers[6][8] = { 2874 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2875 AArch64::W5, AArch64::W6, AArch64::W7 }, 2876 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2877 AArch64::X5, AArch64::X6, AArch64::X7 }, 2878 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2879 AArch64::H5, AArch64::H6, AArch64::H7 }, 2880 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2881 AArch64::S5, AArch64::S6, AArch64::S7 }, 2882 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2883 AArch64::D5, AArch64::D6, AArch64::D7 }, 2884 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2885 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2886 }; 2887 2888 unsigned GPRIdx = 0; 2889 unsigned FPRIdx = 0; 2890 for (auto const &Arg : F->args()) { 2891 MVT VT = TLI.getSimpleValueType(Arg.getType()); 2892 unsigned SrcReg; 2893 const TargetRegisterClass *RC; 2894 if (VT >= MVT::i1 && VT <= MVT::i32) { 2895 SrcReg = Registers[0][GPRIdx++]; 2896 RC = &AArch64::GPR32RegClass; 2897 VT = MVT::i32; 2898 } else if (VT == MVT::i64) { 2899 SrcReg = Registers[1][GPRIdx++]; 2900 RC = &AArch64::GPR64RegClass; 2901 } else if (VT == MVT::f16) { 2902 SrcReg = Registers[2][FPRIdx++]; 2903 RC = &AArch64::FPR16RegClass; 2904 } else if (VT == MVT::f32) { 2905 SrcReg = Registers[3][FPRIdx++]; 2906 RC = &AArch64::FPR32RegClass; 2907 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2908 SrcReg = Registers[4][FPRIdx++]; 2909 RC = &AArch64::FPR64RegClass; 2910 } else if (VT.is128BitVector()) { 2911 SrcReg = Registers[5][FPRIdx++]; 2912 RC = &AArch64::FPR128RegClass; 2913 } else 2914 llvm_unreachable("Unexpected value type."); 2915 2916 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2917 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2918 // Without this, EmitLiveInCopies may eliminate the livein if its only 2919 // use is a bitcast (which isn't turned into an instruction). 2920 unsigned ResultReg = createResultReg(RC); 2921 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2922 TII.get(TargetOpcode::COPY), ResultReg) 2923 .addReg(DstReg, getKillRegState(true)); 2924 updateValueMap(&Arg, ResultReg); 2925 } 2926 return true; 2927 } 2928 2929 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2930 SmallVectorImpl<MVT> &OutVTs, 2931 unsigned &NumBytes) { 2932 CallingConv::ID CC = CLI.CallConv; 2933 SmallVector<CCValAssign, 16> ArgLocs; 2934 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2935 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2936 2937 // Get a count of how many bytes are to be pushed on the stack. 2938 NumBytes = CCInfo.getNextStackOffset(); 2939 2940 // Issue CALLSEQ_START 2941 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2942 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2943 .addImm(NumBytes); 2944 2945 // Process the args. 2946 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2947 CCValAssign &VA = ArgLocs[i]; 2948 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 2949 MVT ArgVT = OutVTs[VA.getValNo()]; 2950 2951 unsigned ArgReg = getRegForValue(ArgVal); 2952 if (!ArgReg) 2953 return false; 2954 2955 // Handle arg promotion: SExt, ZExt, AExt. 2956 switch (VA.getLocInfo()) { 2957 case CCValAssign::Full: 2958 break; 2959 case CCValAssign::SExt: { 2960 MVT DestVT = VA.getLocVT(); 2961 MVT SrcVT = ArgVT; 2962 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 2963 if (!ArgReg) 2964 return false; 2965 break; 2966 } 2967 case CCValAssign::AExt: 2968 // Intentional fall-through. 2969 case CCValAssign::ZExt: { 2970 MVT DestVT = VA.getLocVT(); 2971 MVT SrcVT = ArgVT; 2972 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 2973 if (!ArgReg) 2974 return false; 2975 break; 2976 } 2977 default: 2978 llvm_unreachable("Unknown arg promotion!"); 2979 } 2980 2981 // Now copy/store arg to correct locations. 2982 if (VA.isRegLoc() && !VA.needsCustom()) { 2983 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2984 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 2985 CLI.OutRegs.push_back(VA.getLocReg()); 2986 } else if (VA.needsCustom()) { 2987 // FIXME: Handle custom args. 2988 return false; 2989 } else { 2990 assert(VA.isMemLoc() && "Assuming store on stack."); 2991 2992 // Don't emit stores for undef values. 2993 if (isa<UndefValue>(ArgVal)) 2994 continue; 2995 2996 // Need to store on the stack. 2997 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 2998 2999 unsigned BEAlign = 0; 3000 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3001 BEAlign = 8 - ArgSize; 3002 3003 Address Addr; 3004 Addr.setKind(Address::RegBase); 3005 Addr.setReg(AArch64::SP); 3006 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3007 3008 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3009 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3010 MachinePointerInfo::getStack(Addr.getOffset()), 3011 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3012 3013 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3014 return false; 3015 } 3016 } 3017 return true; 3018 } 3019 3020 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3021 unsigned NumBytes) { 3022 CallingConv::ID CC = CLI.CallConv; 3023 3024 // Issue CALLSEQ_END 3025 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3026 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3027 .addImm(NumBytes).addImm(0); 3028 3029 // Now the return value. 3030 if (RetVT != MVT::isVoid) { 3031 SmallVector<CCValAssign, 16> RVLocs; 3032 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3033 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3034 3035 // Only handle a single return value. 3036 if (RVLocs.size() != 1) 3037 return false; 3038 3039 // Copy all of the result registers out of their specified physreg. 3040 MVT CopyVT = RVLocs[0].getValVT(); 3041 3042 // TODO: Handle big-endian results 3043 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3044 return false; 3045 3046 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3047 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3048 TII.get(TargetOpcode::COPY), ResultReg) 3049 .addReg(RVLocs[0].getLocReg()); 3050 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3051 3052 CLI.ResultReg = ResultReg; 3053 CLI.NumResultRegs = 1; 3054 } 3055 3056 return true; 3057 } 3058 3059 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3060 CallingConv::ID CC = CLI.CallConv; 3061 bool IsTailCall = CLI.IsTailCall; 3062 bool IsVarArg = CLI.IsVarArg; 3063 const Value *Callee = CLI.Callee; 3064 const char *SymName = CLI.SymName; 3065 3066 if (!Callee && !SymName) 3067 return false; 3068 3069 // Allow SelectionDAG isel to handle tail calls. 3070 if (IsTailCall) 3071 return false; 3072 3073 CodeModel::Model CM = TM.getCodeModel(); 3074 // Only support the small and large code model. 3075 if (CM != CodeModel::Small && CM != CodeModel::Large) 3076 return false; 3077 3078 // FIXME: Add large code model support for ELF. 3079 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3080 return false; 3081 3082 // Let SDISel handle vararg functions. 3083 if (IsVarArg) 3084 return false; 3085 3086 // FIXME: Only handle *simple* calls for now. 3087 MVT RetVT; 3088 if (CLI.RetTy->isVoidTy()) 3089 RetVT = MVT::isVoid; 3090 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3091 return false; 3092 3093 for (auto Flag : CLI.OutFlags) 3094 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) 3095 return false; 3096 3097 // Set up the argument vectors. 3098 SmallVector<MVT, 16> OutVTs; 3099 OutVTs.reserve(CLI.OutVals.size()); 3100 3101 for (auto *Val : CLI.OutVals) { 3102 MVT VT; 3103 if (!isTypeLegal(Val->getType(), VT) && 3104 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3105 return false; 3106 3107 // We don't handle vector parameters yet. 3108 if (VT.isVector() || VT.getSizeInBits() > 64) 3109 return false; 3110 3111 OutVTs.push_back(VT); 3112 } 3113 3114 Address Addr; 3115 if (Callee && !computeCallAddress(Callee, Addr)) 3116 return false; 3117 3118 // Handle the arguments now that we've gotten them. 3119 unsigned NumBytes; 3120 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3121 return false; 3122 3123 // Issue the call. 3124 MachineInstrBuilder MIB; 3125 if (CM == CodeModel::Small) { 3126 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3127 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3128 if (SymName) 3129 MIB.addExternalSymbol(SymName, 0); 3130 else if (Addr.getGlobalValue()) 3131 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3132 else if (Addr.getReg()) { 3133 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3134 MIB.addReg(Reg); 3135 } else 3136 return false; 3137 } else { 3138 unsigned CallReg = 0; 3139 if (SymName) { 3140 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3141 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3142 ADRPReg) 3143 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3144 3145 CallReg = createResultReg(&AArch64::GPR64RegClass); 3146 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 3147 CallReg) 3148 .addReg(ADRPReg) 3149 .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 3150 AArch64II::MO_NC); 3151 } else if (Addr.getGlobalValue()) 3152 CallReg = materializeGV(Addr.getGlobalValue()); 3153 else if (Addr.getReg()) 3154 CallReg = Addr.getReg(); 3155 3156 if (!CallReg) 3157 return false; 3158 3159 const MCInstrDesc &II = TII.get(AArch64::BLR); 3160 CallReg = constrainOperandRegClass(II, CallReg, 0); 3161 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3162 } 3163 3164 // Add implicit physical register uses to the call. 3165 for (auto Reg : CLI.OutRegs) 3166 MIB.addReg(Reg, RegState::Implicit); 3167 3168 // Add a register mask with the call-preserved registers. 3169 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3170 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3171 3172 CLI.Call = MIB; 3173 3174 // Finish off the call including any return values. 3175 return finishCall(CLI, RetVT, NumBytes); 3176 } 3177 3178 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3179 if (Alignment) 3180 return Len / Alignment <= 4; 3181 else 3182 return Len < 32; 3183 } 3184 3185 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3186 uint64_t Len, unsigned Alignment) { 3187 // Make sure we don't bloat code by inlining very large memcpy's. 3188 if (!isMemCpySmall(Len, Alignment)) 3189 return false; 3190 3191 int64_t UnscaledOffset = 0; 3192 Address OrigDest = Dest; 3193 Address OrigSrc = Src; 3194 3195 while (Len) { 3196 MVT VT; 3197 if (!Alignment || Alignment >= 8) { 3198 if (Len >= 8) 3199 VT = MVT::i64; 3200 else if (Len >= 4) 3201 VT = MVT::i32; 3202 else if (Len >= 2) 3203 VT = MVT::i16; 3204 else { 3205 VT = MVT::i8; 3206 } 3207 } else { 3208 // Bound based on alignment. 3209 if (Len >= 4 && Alignment == 4) 3210 VT = MVT::i32; 3211 else if (Len >= 2 && Alignment == 2) 3212 VT = MVT::i16; 3213 else { 3214 VT = MVT::i8; 3215 } 3216 } 3217 3218 unsigned ResultReg = emitLoad(VT, VT, Src); 3219 if (!ResultReg) 3220 return false; 3221 3222 if (!emitStore(VT, ResultReg, Dest)) 3223 return false; 3224 3225 int64_t Size = VT.getSizeInBits() / 8; 3226 Len -= Size; 3227 UnscaledOffset += Size; 3228 3229 // We need to recompute the unscaled offset for each iteration. 3230 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3231 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3232 } 3233 3234 return true; 3235 } 3236 3237 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 3238 /// into the user. The condition code will only be updated on success. 3239 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3240 const Instruction *I, 3241 const Value *Cond) { 3242 if (!isa<ExtractValueInst>(Cond)) 3243 return false; 3244 3245 const auto *EV = cast<ExtractValueInst>(Cond); 3246 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3247 return false; 3248 3249 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3250 MVT RetVT; 3251 const Function *Callee = II->getCalledFunction(); 3252 Type *RetTy = 3253 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3254 if (!isTypeLegal(RetTy, RetVT)) 3255 return false; 3256 3257 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3258 return false; 3259 3260 const Value *LHS = II->getArgOperand(0); 3261 const Value *RHS = II->getArgOperand(1); 3262 3263 // Canonicalize immediate to the RHS. 3264 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3265 isCommutativeIntrinsic(II)) 3266 std::swap(LHS, RHS); 3267 3268 // Simplify multiplies. 3269 unsigned IID = II->getIntrinsicID(); 3270 switch (IID) { 3271 default: 3272 break; 3273 case Intrinsic::smul_with_overflow: 3274 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3275 if (C->getValue() == 2) 3276 IID = Intrinsic::sadd_with_overflow; 3277 break; 3278 case Intrinsic::umul_with_overflow: 3279 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3280 if (C->getValue() == 2) 3281 IID = Intrinsic::uadd_with_overflow; 3282 break; 3283 } 3284 3285 AArch64CC::CondCode TmpCC; 3286 switch (IID) { 3287 default: 3288 return false; 3289 case Intrinsic::sadd_with_overflow: 3290 case Intrinsic::ssub_with_overflow: 3291 TmpCC = AArch64CC::VS; 3292 break; 3293 case Intrinsic::uadd_with_overflow: 3294 TmpCC = AArch64CC::HS; 3295 break; 3296 case Intrinsic::usub_with_overflow: 3297 TmpCC = AArch64CC::LO; 3298 break; 3299 case Intrinsic::smul_with_overflow: 3300 case Intrinsic::umul_with_overflow: 3301 TmpCC = AArch64CC::NE; 3302 break; 3303 } 3304 3305 // Check if both instructions are in the same basic block. 3306 if (!isValueAvailable(II)) 3307 return false; 3308 3309 // Make sure nothing is in the way 3310 BasicBlock::const_iterator Start = I; 3311 BasicBlock::const_iterator End = II; 3312 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3313 // We only expect extractvalue instructions between the intrinsic and the 3314 // instruction to be selected. 3315 if (!isa<ExtractValueInst>(Itr)) 3316 return false; 3317 3318 // Check that the extractvalue operand comes from the intrinsic. 3319 const auto *EVI = cast<ExtractValueInst>(Itr); 3320 if (EVI->getAggregateOperand() != II) 3321 return false; 3322 } 3323 3324 CC = TmpCC; 3325 return true; 3326 } 3327 3328 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3329 // FIXME: Handle more intrinsics. 3330 switch (II->getIntrinsicID()) { 3331 default: return false; 3332 case Intrinsic::frameaddress: { 3333 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 3334 MFI->setFrameAddressIsTaken(true); 3335 3336 const AArch64RegisterInfo *RegInfo = 3337 static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo()); 3338 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3339 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3340 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3341 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3342 // Recursively load frame address 3343 // ldr x0, [fp] 3344 // ldr x0, [x0] 3345 // ldr x0, [x0] 3346 // ... 3347 unsigned DestReg; 3348 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3349 while (Depth--) { 3350 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3351 SrcReg, /*IsKill=*/true, 0); 3352 assert(DestReg && "Unexpected LDR instruction emission failure."); 3353 SrcReg = DestReg; 3354 } 3355 3356 updateValueMap(II, SrcReg); 3357 return true; 3358 } 3359 case Intrinsic::memcpy: 3360 case Intrinsic::memmove: { 3361 const auto *MTI = cast<MemTransferInst>(II); 3362 // Don't handle volatile. 3363 if (MTI->isVolatile()) 3364 return false; 3365 3366 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3367 // we would emit dead code because we don't currently handle memmoves. 3368 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3369 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3370 // Small memcpy's are common enough that we want to do them without a call 3371 // if possible. 3372 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3373 unsigned Alignment = MTI->getAlignment(); 3374 if (isMemCpySmall(Len, Alignment)) { 3375 Address Dest, Src; 3376 if (!computeAddress(MTI->getRawDest(), Dest) || 3377 !computeAddress(MTI->getRawSource(), Src)) 3378 return false; 3379 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3380 return true; 3381 } 3382 } 3383 3384 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3385 return false; 3386 3387 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3388 // Fast instruction selection doesn't support the special 3389 // address spaces. 3390 return false; 3391 3392 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3393 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); 3394 } 3395 case Intrinsic::memset: { 3396 const MemSetInst *MSI = cast<MemSetInst>(II); 3397 // Don't handle volatile. 3398 if (MSI->isVolatile()) 3399 return false; 3400 3401 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3402 return false; 3403 3404 if (MSI->getDestAddressSpace() > 255) 3405 // Fast instruction selection doesn't support the special 3406 // address spaces. 3407 return false; 3408 3409 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 3410 } 3411 case Intrinsic::sin: 3412 case Intrinsic::cos: 3413 case Intrinsic::pow: { 3414 MVT RetVT; 3415 if (!isTypeLegal(II->getType(), RetVT)) 3416 return false; 3417 3418 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3419 return false; 3420 3421 static const RTLIB::Libcall LibCallTable[3][2] = { 3422 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3423 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3424 { RTLIB::POW_F32, RTLIB::POW_F64 } 3425 }; 3426 RTLIB::Libcall LC; 3427 bool Is64Bit = RetVT == MVT::f64; 3428 switch (II->getIntrinsicID()) { 3429 default: 3430 llvm_unreachable("Unexpected intrinsic."); 3431 case Intrinsic::sin: 3432 LC = LibCallTable[0][Is64Bit]; 3433 break; 3434 case Intrinsic::cos: 3435 LC = LibCallTable[1][Is64Bit]; 3436 break; 3437 case Intrinsic::pow: 3438 LC = LibCallTable[2][Is64Bit]; 3439 break; 3440 } 3441 3442 ArgListTy Args; 3443 Args.reserve(II->getNumArgOperands()); 3444 3445 // Populate the argument list. 3446 for (auto &Arg : II->arg_operands()) { 3447 ArgListEntry Entry; 3448 Entry.Val = Arg; 3449 Entry.Ty = Arg->getType(); 3450 Args.push_back(Entry); 3451 } 3452 3453 CallLoweringInfo CLI; 3454 CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(), 3455 TLI.getLibcallName(LC), std::move(Args)); 3456 if (!lowerCallTo(CLI)) 3457 return false; 3458 updateValueMap(II, CLI.ResultReg); 3459 return true; 3460 } 3461 case Intrinsic::fabs: { 3462 MVT VT; 3463 if (!isTypeLegal(II->getType(), VT)) 3464 return false; 3465 3466 unsigned Opc; 3467 switch (VT.SimpleTy) { 3468 default: 3469 return false; 3470 case MVT::f32: 3471 Opc = AArch64::FABSSr; 3472 break; 3473 case MVT::f64: 3474 Opc = AArch64::FABSDr; 3475 break; 3476 } 3477 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3478 if (!SrcReg) 3479 return false; 3480 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3481 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3482 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3483 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3484 updateValueMap(II, ResultReg); 3485 return true; 3486 } 3487 case Intrinsic::trap: { 3488 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3489 .addImm(1); 3490 return true; 3491 } 3492 case Intrinsic::sqrt: { 3493 Type *RetTy = II->getCalledFunction()->getReturnType(); 3494 3495 MVT VT; 3496 if (!isTypeLegal(RetTy, VT)) 3497 return false; 3498 3499 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3500 if (!Op0Reg) 3501 return false; 3502 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3503 3504 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3505 if (!ResultReg) 3506 return false; 3507 3508 updateValueMap(II, ResultReg); 3509 return true; 3510 } 3511 case Intrinsic::sadd_with_overflow: 3512 case Intrinsic::uadd_with_overflow: 3513 case Intrinsic::ssub_with_overflow: 3514 case Intrinsic::usub_with_overflow: 3515 case Intrinsic::smul_with_overflow: 3516 case Intrinsic::umul_with_overflow: { 3517 // This implements the basic lowering of the xalu with overflow intrinsics. 3518 const Function *Callee = II->getCalledFunction(); 3519 auto *Ty = cast<StructType>(Callee->getReturnType()); 3520 Type *RetTy = Ty->getTypeAtIndex(0U); 3521 3522 MVT VT; 3523 if (!isTypeLegal(RetTy, VT)) 3524 return false; 3525 3526 if (VT != MVT::i32 && VT != MVT::i64) 3527 return false; 3528 3529 const Value *LHS = II->getArgOperand(0); 3530 const Value *RHS = II->getArgOperand(1); 3531 // Canonicalize immediate to the RHS. 3532 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3533 isCommutativeIntrinsic(II)) 3534 std::swap(LHS, RHS); 3535 3536 // Simplify multiplies. 3537 unsigned IID = II->getIntrinsicID(); 3538 switch (IID) { 3539 default: 3540 break; 3541 case Intrinsic::smul_with_overflow: 3542 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3543 if (C->getValue() == 2) { 3544 IID = Intrinsic::sadd_with_overflow; 3545 RHS = LHS; 3546 } 3547 break; 3548 case Intrinsic::umul_with_overflow: 3549 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3550 if (C->getValue() == 2) { 3551 IID = Intrinsic::uadd_with_overflow; 3552 RHS = LHS; 3553 } 3554 break; 3555 } 3556 3557 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3558 AArch64CC::CondCode CC = AArch64CC::Invalid; 3559 switch (IID) { 3560 default: llvm_unreachable("Unexpected intrinsic!"); 3561 case Intrinsic::sadd_with_overflow: 3562 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3563 CC = AArch64CC::VS; 3564 break; 3565 case Intrinsic::uadd_with_overflow: 3566 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3567 CC = AArch64CC::HS; 3568 break; 3569 case Intrinsic::ssub_with_overflow: 3570 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3571 CC = AArch64CC::VS; 3572 break; 3573 case Intrinsic::usub_with_overflow: 3574 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3575 CC = AArch64CC::LO; 3576 break; 3577 case Intrinsic::smul_with_overflow: { 3578 CC = AArch64CC::NE; 3579 unsigned LHSReg = getRegForValue(LHS); 3580 if (!LHSReg) 3581 return false; 3582 bool LHSIsKill = hasTrivialKill(LHS); 3583 3584 unsigned RHSReg = getRegForValue(RHS); 3585 if (!RHSReg) 3586 return false; 3587 bool RHSIsKill = hasTrivialKill(RHS); 3588 3589 if (VT == MVT::i32) { 3590 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3591 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3592 /*IsKill=*/false, 32); 3593 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3594 AArch64::sub_32); 3595 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3596 AArch64::sub_32); 3597 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3598 AArch64_AM::ASR, 31, /*WantResult=*/false); 3599 } else { 3600 assert(VT == MVT::i64 && "Unexpected value type."); 3601 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3602 // reused in the next instruction. 3603 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3604 /*IsKill=*/false); 3605 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3606 RHSReg, RHSIsKill); 3607 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3608 AArch64_AM::ASR, 63, /*WantResult=*/false); 3609 } 3610 break; 3611 } 3612 case Intrinsic::umul_with_overflow: { 3613 CC = AArch64CC::NE; 3614 unsigned LHSReg = getRegForValue(LHS); 3615 if (!LHSReg) 3616 return false; 3617 bool LHSIsKill = hasTrivialKill(LHS); 3618 3619 unsigned RHSReg = getRegForValue(RHS); 3620 if (!RHSReg) 3621 return false; 3622 bool RHSIsKill = hasTrivialKill(RHS); 3623 3624 if (VT == MVT::i32) { 3625 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3626 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3627 /*IsKill=*/false, AArch64_AM::LSR, 32, 3628 /*WantResult=*/false); 3629 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3630 AArch64::sub_32); 3631 } else { 3632 assert(VT == MVT::i64 && "Unexpected value type."); 3633 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3634 // reused in the next instruction. 3635 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3636 /*IsKill=*/false); 3637 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3638 RHSReg, RHSIsKill); 3639 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3640 /*IsKill=*/false, /*WantResult=*/false); 3641 } 3642 break; 3643 } 3644 } 3645 3646 if (MulReg) { 3647 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3648 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3649 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3650 } 3651 3652 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3653 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3654 /*IsKill=*/true, getInvertedCondCode(CC)); 3655 (void)ResultReg2; 3656 assert((ResultReg1 + 1) == ResultReg2 && 3657 "Nonconsecutive result registers."); 3658 updateValueMap(II, ResultReg1, 2); 3659 return true; 3660 } 3661 } 3662 return false; 3663 } 3664 3665 bool AArch64FastISel::selectRet(const Instruction *I) { 3666 const ReturnInst *Ret = cast<ReturnInst>(I); 3667 const Function &F = *I->getParent()->getParent(); 3668 3669 if (!FuncInfo.CanLowerReturn) 3670 return false; 3671 3672 if (F.isVarArg()) 3673 return false; 3674 3675 // Build a list of return value registers. 3676 SmallVector<unsigned, 4> RetRegs; 3677 3678 if (Ret->getNumOperands() > 0) { 3679 CallingConv::ID CC = F.getCallingConv(); 3680 SmallVector<ISD::OutputArg, 4> Outs; 3681 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 3682 3683 // Analyze operands of the call, assigning locations to each operand. 3684 SmallVector<CCValAssign, 16> ValLocs; 3685 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3686 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3687 : RetCC_AArch64_AAPCS; 3688 CCInfo.AnalyzeReturn(Outs, RetCC); 3689 3690 // Only handle a single return value for now. 3691 if (ValLocs.size() != 1) 3692 return false; 3693 3694 CCValAssign &VA = ValLocs[0]; 3695 const Value *RV = Ret->getOperand(0); 3696 3697 // Don't bother handling odd stuff for now. 3698 if ((VA.getLocInfo() != CCValAssign::Full) && 3699 (VA.getLocInfo() != CCValAssign::BCvt)) 3700 return false; 3701 3702 // Only handle register returns for now. 3703 if (!VA.isRegLoc()) 3704 return false; 3705 3706 unsigned Reg = getRegForValue(RV); 3707 if (Reg == 0) 3708 return false; 3709 3710 unsigned SrcReg = Reg + VA.getValNo(); 3711 unsigned DestReg = VA.getLocReg(); 3712 // Avoid a cross-class copy. This is very unlikely. 3713 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3714 return false; 3715 3716 EVT RVEVT = TLI.getValueType(RV->getType()); 3717 if (!RVEVT.isSimple()) 3718 return false; 3719 3720 // Vectors (of > 1 lane) in big endian need tricky handling. 3721 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3722 !Subtarget->isLittleEndian()) 3723 return false; 3724 3725 MVT RVVT = RVEVT.getSimpleVT(); 3726 if (RVVT == MVT::f128) 3727 return false; 3728 3729 MVT DestVT = VA.getValVT(); 3730 // Special handling for extended integers. 3731 if (RVVT != DestVT) { 3732 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3733 return false; 3734 3735 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3736 return false; 3737 3738 bool IsZExt = Outs[0].Flags.isZExt(); 3739 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3740 if (SrcReg == 0) 3741 return false; 3742 } 3743 3744 // Make the copy. 3745 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3746 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3747 3748 // Add register to return instruction. 3749 RetRegs.push_back(VA.getLocReg()); 3750 } 3751 3752 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3753 TII.get(AArch64::RET_ReallyLR)); 3754 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 3755 MIB.addReg(RetRegs[i], RegState::Implicit); 3756 return true; 3757 } 3758 3759 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3760 Type *DestTy = I->getType(); 3761 Value *Op = I->getOperand(0); 3762 Type *SrcTy = Op->getType(); 3763 3764 EVT SrcEVT = TLI.getValueType(SrcTy, true); 3765 EVT DestEVT = TLI.getValueType(DestTy, true); 3766 if (!SrcEVT.isSimple()) 3767 return false; 3768 if (!DestEVT.isSimple()) 3769 return false; 3770 3771 MVT SrcVT = SrcEVT.getSimpleVT(); 3772 MVT DestVT = DestEVT.getSimpleVT(); 3773 3774 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3775 SrcVT != MVT::i8) 3776 return false; 3777 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3778 DestVT != MVT::i1) 3779 return false; 3780 3781 unsigned SrcReg = getRegForValue(Op); 3782 if (!SrcReg) 3783 return false; 3784 bool SrcIsKill = hasTrivialKill(Op); 3785 3786 // If we're truncating from i64 to a smaller non-legal type then generate an 3787 // AND. Otherwise, we know the high bits are undefined and a truncate only 3788 // generate a COPY. We cannot mark the source register also as result 3789 // register, because this can incorrectly transfer the kill flag onto the 3790 // source register. 3791 unsigned ResultReg; 3792 if (SrcVT == MVT::i64) { 3793 uint64_t Mask = 0; 3794 switch (DestVT.SimpleTy) { 3795 default: 3796 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3797 return false; 3798 case MVT::i1: 3799 Mask = 0x1; 3800 break; 3801 case MVT::i8: 3802 Mask = 0xff; 3803 break; 3804 case MVT::i16: 3805 Mask = 0xffff; 3806 break; 3807 } 3808 // Issue an extract_subreg to get the lower 32-bits. 3809 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3810 AArch64::sub_32); 3811 // Create the AND instruction which performs the actual truncation. 3812 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3813 assert(ResultReg && "Unexpected AND instruction emission failure."); 3814 } else { 3815 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3816 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3817 TII.get(TargetOpcode::COPY), ResultReg) 3818 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3819 } 3820 3821 updateValueMap(I, ResultReg); 3822 return true; 3823 } 3824 3825 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3826 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3827 DestVT == MVT::i64) && 3828 "Unexpected value type."); 3829 // Handle i8 and i16 as i32. 3830 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3831 DestVT = MVT::i32; 3832 3833 if (IsZExt) { 3834 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 3835 assert(ResultReg && "Unexpected AND instruction emission failure."); 3836 if (DestVT == MVT::i64) { 3837 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3838 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3839 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3840 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3841 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3842 .addImm(0) 3843 .addReg(ResultReg) 3844 .addImm(AArch64::sub_32); 3845 ResultReg = Reg64; 3846 } 3847 return ResultReg; 3848 } else { 3849 if (DestVT == MVT::i64) { 3850 // FIXME: We're SExt i1 to i64. 3851 return 0; 3852 } 3853 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3854 /*TODO:IsKill=*/false, 0, 0); 3855 } 3856 } 3857 3858 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3859 unsigned Op1, bool Op1IsKill) { 3860 unsigned Opc, ZReg; 3861 switch (RetVT.SimpleTy) { 3862 default: return 0; 3863 case MVT::i8: 3864 case MVT::i16: 3865 case MVT::i32: 3866 RetVT = MVT::i32; 3867 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3868 case MVT::i64: 3869 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3870 } 3871 3872 const TargetRegisterClass *RC = 3873 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3874 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 3875 /*IsKill=*/ZReg, true); 3876 } 3877 3878 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3879 unsigned Op1, bool Op1IsKill) { 3880 if (RetVT != MVT::i64) 3881 return 0; 3882 3883 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3884 Op0, Op0IsKill, Op1, Op1IsKill, 3885 AArch64::XZR, /*IsKill=*/true); 3886 } 3887 3888 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3889 unsigned Op1, bool Op1IsKill) { 3890 if (RetVT != MVT::i64) 3891 return 0; 3892 3893 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3894 Op0, Op0IsKill, Op1, Op1IsKill, 3895 AArch64::XZR, /*IsKill=*/true); 3896 } 3897 3898 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 3899 unsigned Op1Reg, bool Op1IsKill) { 3900 unsigned Opc = 0; 3901 bool NeedTrunc = false; 3902 uint64_t Mask = 0; 3903 switch (RetVT.SimpleTy) { 3904 default: return 0; 3905 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 3906 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 3907 case MVT::i32: Opc = AArch64::LSLVWr; break; 3908 case MVT::i64: Opc = AArch64::LSLVXr; break; 3909 } 3910 3911 const TargetRegisterClass *RC = 3912 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3913 if (NeedTrunc) { 3914 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 3915 Op1IsKill = true; 3916 } 3917 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 3918 Op1IsKill); 3919 if (NeedTrunc) 3920 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 3921 return ResultReg; 3922 } 3923 3924 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 3925 bool Op0IsKill, uint64_t Shift, 3926 bool IsZExt) { 3927 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 3928 "Unexpected source/return type pair."); 3929 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 3930 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 3931 "Unexpected source value type."); 3932 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 3933 RetVT == MVT::i64) && "Unexpected return value type."); 3934 3935 bool Is64Bit = (RetVT == MVT::i64); 3936 unsigned RegSize = Is64Bit ? 64 : 32; 3937 unsigned DstBits = RetVT.getSizeInBits(); 3938 unsigned SrcBits = SrcVT.getSizeInBits(); 3939 const TargetRegisterClass *RC = 3940 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3941 3942 // Just emit a copy for "zero" shifts. 3943 if (Shift == 0) { 3944 if (RetVT == SrcVT) { 3945 unsigned ResultReg = createResultReg(RC); 3946 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3947 TII.get(TargetOpcode::COPY), ResultReg) 3948 .addReg(Op0, getKillRegState(Op0IsKill)); 3949 return ResultReg; 3950 } else 3951 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 3952 } 3953 3954 // Don't deal with undefined shifts. 3955 if (Shift >= DstBits) 3956 return 0; 3957 3958 // For immediate shifts we can fold the zero-/sign-extension into the shift. 3959 // {S|U}BFM Wd, Wn, #r, #s 3960 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 3961 3962 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3963 // %2 = shl i16 %1, 4 3964 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 3965 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 3966 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 3967 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 3968 3969 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3970 // %2 = shl i16 %1, 8 3971 // Wd<32+7-24,32-24> = Wn<7:0> 3972 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 3973 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 3974 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 3975 3976 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3977 // %2 = shl i16 %1, 12 3978 // Wd<32+3-20,32-20> = Wn<3:0> 3979 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 3980 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 3981 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 3982 3983 unsigned ImmR = RegSize - Shift; 3984 // Limit the width to the length of the source type. 3985 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 3986 static const unsigned OpcTable[2][2] = { 3987 {AArch64::SBFMWri, AArch64::SBFMXri}, 3988 {AArch64::UBFMWri, AArch64::UBFMXri} 3989 }; 3990 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 3991 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 3992 unsigned TmpReg = MRI.createVirtualRegister(RC); 3993 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3994 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 3995 .addImm(0) 3996 .addReg(Op0, getKillRegState(Op0IsKill)) 3997 .addImm(AArch64::sub_32); 3998 Op0 = TmpReg; 3999 Op0IsKill = true; 4000 } 4001 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4002 } 4003 4004 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4005 unsigned Op1Reg, bool Op1IsKill) { 4006 unsigned Opc = 0; 4007 bool NeedTrunc = false; 4008 uint64_t Mask = 0; 4009 switch (RetVT.SimpleTy) { 4010 default: return 0; 4011 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4012 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4013 case MVT::i32: Opc = AArch64::LSRVWr; break; 4014 case MVT::i64: Opc = AArch64::LSRVXr; break; 4015 } 4016 4017 const TargetRegisterClass *RC = 4018 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4019 if (NeedTrunc) { 4020 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4021 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4022 Op0IsKill = Op1IsKill = true; 4023 } 4024 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4025 Op1IsKill); 4026 if (NeedTrunc) 4027 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4028 return ResultReg; 4029 } 4030 4031 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4032 bool Op0IsKill, uint64_t Shift, 4033 bool IsZExt) { 4034 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4035 "Unexpected source/return type pair."); 4036 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4037 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4038 "Unexpected source value type."); 4039 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4040 RetVT == MVT::i64) && "Unexpected return value type."); 4041 4042 bool Is64Bit = (RetVT == MVT::i64); 4043 unsigned RegSize = Is64Bit ? 64 : 32; 4044 unsigned DstBits = RetVT.getSizeInBits(); 4045 unsigned SrcBits = SrcVT.getSizeInBits(); 4046 const TargetRegisterClass *RC = 4047 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4048 4049 // Just emit a copy for "zero" shifts. 4050 if (Shift == 0) { 4051 if (RetVT == SrcVT) { 4052 unsigned ResultReg = createResultReg(RC); 4053 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4054 TII.get(TargetOpcode::COPY), ResultReg) 4055 .addReg(Op0, getKillRegState(Op0IsKill)); 4056 return ResultReg; 4057 } else 4058 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4059 } 4060 4061 // Don't deal with undefined shifts. 4062 if (Shift >= DstBits) 4063 return 0; 4064 4065 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4066 // {S|U}BFM Wd, Wn, #r, #s 4067 // Wd<s-r:0> = Wn<s:r> when r <= s 4068 4069 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4070 // %2 = lshr i16 %1, 4 4071 // Wd<7-4:0> = Wn<7:4> 4072 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4073 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4074 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4075 4076 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4077 // %2 = lshr i16 %1, 8 4078 // Wd<7-7,0> = Wn<7:7> 4079 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4080 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4081 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4082 4083 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4084 // %2 = lshr i16 %1, 12 4085 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4086 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4087 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4088 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4089 4090 if (Shift >= SrcBits && IsZExt) 4091 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4092 4093 // It is not possible to fold a sign-extend into the LShr instruction. In this 4094 // case emit a sign-extend. 4095 if (!IsZExt) { 4096 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4097 if (!Op0) 4098 return 0; 4099 Op0IsKill = true; 4100 SrcVT = RetVT; 4101 SrcBits = SrcVT.getSizeInBits(); 4102 IsZExt = true; 4103 } 4104 4105 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4106 unsigned ImmS = SrcBits - 1; 4107 static const unsigned OpcTable[2][2] = { 4108 {AArch64::SBFMWri, AArch64::SBFMXri}, 4109 {AArch64::UBFMWri, AArch64::UBFMXri} 4110 }; 4111 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4112 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4113 unsigned TmpReg = MRI.createVirtualRegister(RC); 4114 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4115 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4116 .addImm(0) 4117 .addReg(Op0, getKillRegState(Op0IsKill)) 4118 .addImm(AArch64::sub_32); 4119 Op0 = TmpReg; 4120 Op0IsKill = true; 4121 } 4122 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4123 } 4124 4125 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4126 unsigned Op1Reg, bool Op1IsKill) { 4127 unsigned Opc = 0; 4128 bool NeedTrunc = false; 4129 uint64_t Mask = 0; 4130 switch (RetVT.SimpleTy) { 4131 default: return 0; 4132 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4133 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4134 case MVT::i32: Opc = AArch64::ASRVWr; break; 4135 case MVT::i64: Opc = AArch64::ASRVXr; break; 4136 } 4137 4138 const TargetRegisterClass *RC = 4139 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4140 if (NeedTrunc) { 4141 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); 4142 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4143 Op0IsKill = Op1IsKill = true; 4144 } 4145 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4146 Op1IsKill); 4147 if (NeedTrunc) 4148 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4149 return ResultReg; 4150 } 4151 4152 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4153 bool Op0IsKill, uint64_t Shift, 4154 bool IsZExt) { 4155 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4156 "Unexpected source/return type pair."); 4157 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4158 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4159 "Unexpected source value type."); 4160 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4161 RetVT == MVT::i64) && "Unexpected return value type."); 4162 4163 bool Is64Bit = (RetVT == MVT::i64); 4164 unsigned RegSize = Is64Bit ? 64 : 32; 4165 unsigned DstBits = RetVT.getSizeInBits(); 4166 unsigned SrcBits = SrcVT.getSizeInBits(); 4167 const TargetRegisterClass *RC = 4168 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4169 4170 // Just emit a copy for "zero" shifts. 4171 if (Shift == 0) { 4172 if (RetVT == SrcVT) { 4173 unsigned ResultReg = createResultReg(RC); 4174 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4175 TII.get(TargetOpcode::COPY), ResultReg) 4176 .addReg(Op0, getKillRegState(Op0IsKill)); 4177 return ResultReg; 4178 } else 4179 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4180 } 4181 4182 // Don't deal with undefined shifts. 4183 if (Shift >= DstBits) 4184 return 0; 4185 4186 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4187 // {S|U}BFM Wd, Wn, #r, #s 4188 // Wd<s-r:0> = Wn<s:r> when r <= s 4189 4190 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4191 // %2 = ashr i16 %1, 4 4192 // Wd<7-4:0> = Wn<7:4> 4193 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4194 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4195 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4196 4197 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4198 // %2 = ashr i16 %1, 8 4199 // Wd<7-7,0> = Wn<7:7> 4200 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4201 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4202 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4203 4204 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4205 // %2 = ashr i16 %1, 12 4206 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4207 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4208 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4209 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4210 4211 if (Shift >= SrcBits && IsZExt) 4212 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4213 4214 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4215 unsigned ImmS = SrcBits - 1; 4216 static const unsigned OpcTable[2][2] = { 4217 {AArch64::SBFMWri, AArch64::SBFMXri}, 4218 {AArch64::UBFMWri, AArch64::UBFMXri} 4219 }; 4220 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4221 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4222 unsigned TmpReg = MRI.createVirtualRegister(RC); 4223 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4224 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4225 .addImm(0) 4226 .addReg(Op0, getKillRegState(Op0IsKill)) 4227 .addImm(AArch64::sub_32); 4228 Op0 = TmpReg; 4229 Op0IsKill = true; 4230 } 4231 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4232 } 4233 4234 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4235 bool IsZExt) { 4236 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4237 4238 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4239 // DestVT are odd things, so test to make sure that they are both types we can 4240 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4241 // bail out to SelectionDAG. 4242 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4243 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4244 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4245 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4246 return 0; 4247 4248 unsigned Opc; 4249 unsigned Imm = 0; 4250 4251 switch (SrcVT.SimpleTy) { 4252 default: 4253 return 0; 4254 case MVT::i1: 4255 return emiti1Ext(SrcReg, DestVT, IsZExt); 4256 case MVT::i8: 4257 if (DestVT == MVT::i64) 4258 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4259 else 4260 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4261 Imm = 7; 4262 break; 4263 case MVT::i16: 4264 if (DestVT == MVT::i64) 4265 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4266 else 4267 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4268 Imm = 15; 4269 break; 4270 case MVT::i32: 4271 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4272 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4273 Imm = 31; 4274 break; 4275 } 4276 4277 // Handle i8 and i16 as i32. 4278 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4279 DestVT = MVT::i32; 4280 else if (DestVT == MVT::i64) { 4281 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4282 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4283 TII.get(AArch64::SUBREG_TO_REG), Src64) 4284 .addImm(0) 4285 .addReg(SrcReg) 4286 .addImm(AArch64::sub_32); 4287 SrcReg = Src64; 4288 } 4289 4290 const TargetRegisterClass *RC = 4291 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4292 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4293 } 4294 4295 static bool isZExtLoad(const MachineInstr *LI) { 4296 switch (LI->getOpcode()) { 4297 default: 4298 return false; 4299 case AArch64::LDURBBi: 4300 case AArch64::LDURHHi: 4301 case AArch64::LDURWi: 4302 case AArch64::LDRBBui: 4303 case AArch64::LDRHHui: 4304 case AArch64::LDRWui: 4305 case AArch64::LDRBBroX: 4306 case AArch64::LDRHHroX: 4307 case AArch64::LDRWroX: 4308 case AArch64::LDRBBroW: 4309 case AArch64::LDRHHroW: 4310 case AArch64::LDRWroW: 4311 return true; 4312 } 4313 } 4314 4315 static bool isSExtLoad(const MachineInstr *LI) { 4316 switch (LI->getOpcode()) { 4317 default: 4318 return false; 4319 case AArch64::LDURSBWi: 4320 case AArch64::LDURSHWi: 4321 case AArch64::LDURSBXi: 4322 case AArch64::LDURSHXi: 4323 case AArch64::LDURSWi: 4324 case AArch64::LDRSBWui: 4325 case AArch64::LDRSHWui: 4326 case AArch64::LDRSBXui: 4327 case AArch64::LDRSHXui: 4328 case AArch64::LDRSWui: 4329 case AArch64::LDRSBWroX: 4330 case AArch64::LDRSHWroX: 4331 case AArch64::LDRSBXroX: 4332 case AArch64::LDRSHXroX: 4333 case AArch64::LDRSWroX: 4334 case AArch64::LDRSBWroW: 4335 case AArch64::LDRSHWroW: 4336 case AArch64::LDRSBXroW: 4337 case AArch64::LDRSHXroW: 4338 case AArch64::LDRSWroW: 4339 return true; 4340 } 4341 } 4342 4343 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4344 MVT SrcVT) { 4345 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4346 if (!LI || !LI->hasOneUse()) 4347 return false; 4348 4349 // Check if the load instruction has already been selected. 4350 unsigned Reg = lookUpRegForValue(LI); 4351 if (!Reg) 4352 return false; 4353 4354 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4355 if (!MI) 4356 return false; 4357 4358 // Check if the correct load instruction has been emitted - SelectionDAG might 4359 // have emitted a zero-extending load, but we need a sign-extending load. 4360 bool IsZExt = isa<ZExtInst>(I); 4361 const auto *LoadMI = MI; 4362 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4363 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4364 unsigned LoadReg = MI->getOperand(1).getReg(); 4365 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4366 assert(LoadMI && "Expected valid instruction"); 4367 } 4368 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4369 return false; 4370 4371 // Nothing to be done. 4372 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4373 updateValueMap(I, Reg); 4374 return true; 4375 } 4376 4377 if (IsZExt) { 4378 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4379 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4380 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4381 .addImm(0) 4382 .addReg(Reg, getKillRegState(true)) 4383 .addImm(AArch64::sub_32); 4384 Reg = Reg64; 4385 } else { 4386 assert((MI->getOpcode() == TargetOpcode::COPY && 4387 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4388 "Expected copy instruction"); 4389 Reg = MI->getOperand(1).getReg(); 4390 MI->eraseFromParent(); 4391 } 4392 updateValueMap(I, Reg); 4393 return true; 4394 } 4395 4396 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4397 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4398 "Unexpected integer extend instruction."); 4399 MVT RetVT; 4400 MVT SrcVT; 4401 if (!isTypeSupported(I->getType(), RetVT)) 4402 return false; 4403 4404 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4405 return false; 4406 4407 // Try to optimize already sign-/zero-extended values from load instructions. 4408 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4409 return true; 4410 4411 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4412 if (!SrcReg) 4413 return false; 4414 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4415 4416 // Try to optimize already sign-/zero-extended values from function arguments. 4417 bool IsZExt = isa<ZExtInst>(I); 4418 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4419 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4420 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4421 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4423 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4424 .addImm(0) 4425 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4426 .addImm(AArch64::sub_32); 4427 SrcReg = ResultReg; 4428 } 4429 // Conservatively clear all kill flags from all uses, because we are 4430 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4431 // level. The result of the instruction at IR level might have been 4432 // trivially dead, which is now not longer true. 4433 unsigned UseReg = lookUpRegForValue(I); 4434 if (UseReg) 4435 MRI.clearKillFlags(UseReg); 4436 4437 updateValueMap(I, SrcReg); 4438 return true; 4439 } 4440 } 4441 4442 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4443 if (!ResultReg) 4444 return false; 4445 4446 updateValueMap(I, ResultReg); 4447 return true; 4448 } 4449 4450 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4451 EVT DestEVT = TLI.getValueType(I->getType(), true); 4452 if (!DestEVT.isSimple()) 4453 return false; 4454 4455 MVT DestVT = DestEVT.getSimpleVT(); 4456 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4457 return false; 4458 4459 unsigned DivOpc; 4460 bool Is64bit = (DestVT == MVT::i64); 4461 switch (ISDOpcode) { 4462 default: 4463 return false; 4464 case ISD::SREM: 4465 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4466 break; 4467 case ISD::UREM: 4468 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4469 break; 4470 } 4471 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4472 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4473 if (!Src0Reg) 4474 return false; 4475 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4476 4477 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4478 if (!Src1Reg) 4479 return false; 4480 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4481 4482 const TargetRegisterClass *RC = 4483 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4484 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4485 Src1Reg, /*IsKill=*/false); 4486 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4487 // The remainder is computed as numerator - (quotient * denominator) using the 4488 // MSUB instruction. 4489 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4490 Src1Reg, Src1IsKill, Src0Reg, 4491 Src0IsKill); 4492 updateValueMap(I, ResultReg); 4493 return true; 4494 } 4495 4496 bool AArch64FastISel::selectMul(const Instruction *I) { 4497 MVT VT; 4498 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4499 return false; 4500 4501 if (VT.isVector()) 4502 return selectBinaryOp(I, ISD::MUL); 4503 4504 const Value *Src0 = I->getOperand(0); 4505 const Value *Src1 = I->getOperand(1); 4506 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4507 if (C->getValue().isPowerOf2()) 4508 std::swap(Src0, Src1); 4509 4510 // Try to simplify to a shift instruction. 4511 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4512 if (C->getValue().isPowerOf2()) { 4513 uint64_t ShiftVal = C->getValue().logBase2(); 4514 MVT SrcVT = VT; 4515 bool IsZExt = true; 4516 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4517 if (!isIntExtFree(ZExt)) { 4518 MVT VT; 4519 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4520 SrcVT = VT; 4521 IsZExt = true; 4522 Src0 = ZExt->getOperand(0); 4523 } 4524 } 4525 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4526 if (!isIntExtFree(SExt)) { 4527 MVT VT; 4528 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4529 SrcVT = VT; 4530 IsZExt = false; 4531 Src0 = SExt->getOperand(0); 4532 } 4533 } 4534 } 4535 4536 unsigned Src0Reg = getRegForValue(Src0); 4537 if (!Src0Reg) 4538 return false; 4539 bool Src0IsKill = hasTrivialKill(Src0); 4540 4541 unsigned ResultReg = 4542 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4543 4544 if (ResultReg) { 4545 updateValueMap(I, ResultReg); 4546 return true; 4547 } 4548 } 4549 4550 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4551 if (!Src0Reg) 4552 return false; 4553 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4554 4555 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4556 if (!Src1Reg) 4557 return false; 4558 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4559 4560 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4561 4562 if (!ResultReg) 4563 return false; 4564 4565 updateValueMap(I, ResultReg); 4566 return true; 4567 } 4568 4569 bool AArch64FastISel::selectShift(const Instruction *I) { 4570 MVT RetVT; 4571 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4572 return false; 4573 4574 if (RetVT.isVector()) 4575 return selectOperator(I, I->getOpcode()); 4576 4577 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4578 unsigned ResultReg = 0; 4579 uint64_t ShiftVal = C->getZExtValue(); 4580 MVT SrcVT = RetVT; 4581 bool IsZExt = I->getOpcode() != Instruction::AShr; 4582 const Value *Op0 = I->getOperand(0); 4583 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4584 if (!isIntExtFree(ZExt)) { 4585 MVT TmpVT; 4586 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4587 SrcVT = TmpVT; 4588 IsZExt = true; 4589 Op0 = ZExt->getOperand(0); 4590 } 4591 } 4592 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4593 if (!isIntExtFree(SExt)) { 4594 MVT TmpVT; 4595 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4596 SrcVT = TmpVT; 4597 IsZExt = false; 4598 Op0 = SExt->getOperand(0); 4599 } 4600 } 4601 } 4602 4603 unsigned Op0Reg = getRegForValue(Op0); 4604 if (!Op0Reg) 4605 return false; 4606 bool Op0IsKill = hasTrivialKill(Op0); 4607 4608 switch (I->getOpcode()) { 4609 default: llvm_unreachable("Unexpected instruction."); 4610 case Instruction::Shl: 4611 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4612 break; 4613 case Instruction::AShr: 4614 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4615 break; 4616 case Instruction::LShr: 4617 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4618 break; 4619 } 4620 if (!ResultReg) 4621 return false; 4622 4623 updateValueMap(I, ResultReg); 4624 return true; 4625 } 4626 4627 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4628 if (!Op0Reg) 4629 return false; 4630 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4631 4632 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4633 if (!Op1Reg) 4634 return false; 4635 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4636 4637 unsigned ResultReg = 0; 4638 switch (I->getOpcode()) { 4639 default: llvm_unreachable("Unexpected instruction."); 4640 case Instruction::Shl: 4641 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4642 break; 4643 case Instruction::AShr: 4644 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4645 break; 4646 case Instruction::LShr: 4647 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4648 break; 4649 } 4650 4651 if (!ResultReg) 4652 return false; 4653 4654 updateValueMap(I, ResultReg); 4655 return true; 4656 } 4657 4658 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4659 MVT RetVT, SrcVT; 4660 4661 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4662 return false; 4663 if (!isTypeLegal(I->getType(), RetVT)) 4664 return false; 4665 4666 unsigned Opc; 4667 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4668 Opc = AArch64::FMOVWSr; 4669 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4670 Opc = AArch64::FMOVXDr; 4671 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4672 Opc = AArch64::FMOVSWr; 4673 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4674 Opc = AArch64::FMOVDXr; 4675 else 4676 return false; 4677 4678 const TargetRegisterClass *RC = nullptr; 4679 switch (RetVT.SimpleTy) { 4680 default: llvm_unreachable("Unexpected value type."); 4681 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4682 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4683 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4684 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4685 } 4686 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4687 if (!Op0Reg) 4688 return false; 4689 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4690 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4691 4692 if (!ResultReg) 4693 return false; 4694 4695 updateValueMap(I, ResultReg); 4696 return true; 4697 } 4698 4699 bool AArch64FastISel::selectFRem(const Instruction *I) { 4700 MVT RetVT; 4701 if (!isTypeLegal(I->getType(), RetVT)) 4702 return false; 4703 4704 RTLIB::Libcall LC; 4705 switch (RetVT.SimpleTy) { 4706 default: 4707 return false; 4708 case MVT::f32: 4709 LC = RTLIB::REM_F32; 4710 break; 4711 case MVT::f64: 4712 LC = RTLIB::REM_F64; 4713 break; 4714 } 4715 4716 ArgListTy Args; 4717 Args.reserve(I->getNumOperands()); 4718 4719 // Populate the argument list. 4720 for (auto &Arg : I->operands()) { 4721 ArgListEntry Entry; 4722 Entry.Val = Arg; 4723 Entry.Ty = Arg->getType(); 4724 Args.push_back(Entry); 4725 } 4726 4727 CallLoweringInfo CLI; 4728 CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(), 4729 TLI.getLibcallName(LC), std::move(Args)); 4730 if (!lowerCallTo(CLI)) 4731 return false; 4732 updateValueMap(I, CLI.ResultReg); 4733 return true; 4734 } 4735 4736 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4737 MVT VT; 4738 if (!isTypeLegal(I->getType(), VT)) 4739 return false; 4740 4741 if (!isa<ConstantInt>(I->getOperand(1))) 4742 return selectBinaryOp(I, ISD::SDIV); 4743 4744 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4745 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4746 !(C.isPowerOf2() || (-C).isPowerOf2())) 4747 return selectBinaryOp(I, ISD::SDIV); 4748 4749 unsigned Lg2 = C.countTrailingZeros(); 4750 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4751 if (!Src0Reg) 4752 return false; 4753 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4754 4755 if (cast<BinaryOperator>(I)->isExact()) { 4756 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4757 if (!ResultReg) 4758 return false; 4759 updateValueMap(I, ResultReg); 4760 return true; 4761 } 4762 4763 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4764 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4765 if (!AddReg) 4766 return false; 4767 4768 // (Src0 < 0) ? Pow2 - 1 : 0; 4769 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4770 return false; 4771 4772 unsigned SelectOpc; 4773 const TargetRegisterClass *RC; 4774 if (VT == MVT::i64) { 4775 SelectOpc = AArch64::CSELXr; 4776 RC = &AArch64::GPR64RegClass; 4777 } else { 4778 SelectOpc = AArch64::CSELWr; 4779 RC = &AArch64::GPR32RegClass; 4780 } 4781 unsigned SelectReg = 4782 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4783 Src0IsKill, AArch64CC::LT); 4784 if (!SelectReg) 4785 return false; 4786 4787 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4788 // negate the result. 4789 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4790 unsigned ResultReg; 4791 if (C.isNegative()) 4792 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4793 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4794 else 4795 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4796 4797 if (!ResultReg) 4798 return false; 4799 4800 updateValueMap(I, ResultReg); 4801 return true; 4802 } 4803 4804 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4805 /// have to duplicate it for AArch64, because otherwise we would fail during the 4806 /// sign-extend emission. 4807 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4808 unsigned IdxN = getRegForValue(Idx); 4809 if (IdxN == 0) 4810 // Unhandled operand. Halt "fast" selection and bail. 4811 return std::pair<unsigned, bool>(0, false); 4812 4813 bool IdxNIsKill = hasTrivialKill(Idx); 4814 4815 // If the index is smaller or larger than intptr_t, truncate or extend it. 4816 MVT PtrVT = TLI.getPointerTy(); 4817 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4818 if (IdxVT.bitsLT(PtrVT)) { 4819 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); 4820 IdxNIsKill = true; 4821 } else if (IdxVT.bitsGT(PtrVT)) 4822 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4823 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 4824 } 4825 4826 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4827 /// duplicate it for AArch64, because otherwise we would bail out even for 4828 /// simple cases. This is because the standard fastEmit functions don't cover 4829 /// MUL at all and ADD is lowered very inefficientily. 4830 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4831 unsigned N = getRegForValue(I->getOperand(0)); 4832 if (!N) 4833 return false; 4834 bool NIsKill = hasTrivialKill(I->getOperand(0)); 4835 4836 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4837 // into a single N = N + TotalOffset. 4838 uint64_t TotalOffs = 0; 4839 Type *Ty = I->getOperand(0)->getType(); 4840 MVT VT = TLI.getPointerTy(); 4841 for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { 4842 const Value *Idx = *OI; 4843 if (auto *StTy = dyn_cast<StructType>(Ty)) { 4844 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4845 // N = N + Offset 4846 if (Field) 4847 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4848 Ty = StTy->getElementType(Field); 4849 } else { 4850 Ty = cast<SequentialType>(Ty)->getElementType(); 4851 // If this is a constant subscript, handle it quickly. 4852 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4853 if (CI->isZero()) 4854 continue; 4855 // N = N + Offset 4856 TotalOffs += 4857 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4858 continue; 4859 } 4860 if (TotalOffs) { 4861 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4862 if (!N) 4863 return false; 4864 NIsKill = true; 4865 TotalOffs = 0; 4866 } 4867 4868 // N = N + Idx * ElementSize; 4869 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4870 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 4871 unsigned IdxN = Pair.first; 4872 bool IdxNIsKill = Pair.second; 4873 if (!IdxN) 4874 return false; 4875 4876 if (ElementSize != 1) { 4877 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4878 if (!C) 4879 return false; 4880 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 4881 if (!IdxN) 4882 return false; 4883 IdxNIsKill = true; 4884 } 4885 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 4886 if (!N) 4887 return false; 4888 } 4889 } 4890 if (TotalOffs) { 4891 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4892 if (!N) 4893 return false; 4894 } 4895 updateValueMap(I, N); 4896 return true; 4897 } 4898 4899 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 4900 switch (I->getOpcode()) { 4901 default: 4902 break; 4903 case Instruction::Add: 4904 case Instruction::Sub: 4905 return selectAddSub(I); 4906 case Instruction::Mul: 4907 return selectMul(I); 4908 case Instruction::SDiv: 4909 return selectSDiv(I); 4910 case Instruction::SRem: 4911 if (!selectBinaryOp(I, ISD::SREM)) 4912 return selectRem(I, ISD::SREM); 4913 return true; 4914 case Instruction::URem: 4915 if (!selectBinaryOp(I, ISD::UREM)) 4916 return selectRem(I, ISD::UREM); 4917 return true; 4918 case Instruction::Shl: 4919 case Instruction::LShr: 4920 case Instruction::AShr: 4921 return selectShift(I); 4922 case Instruction::And: 4923 case Instruction::Or: 4924 case Instruction::Xor: 4925 return selectLogicalOp(I); 4926 case Instruction::Br: 4927 return selectBranch(I); 4928 case Instruction::IndirectBr: 4929 return selectIndirectBr(I); 4930 case Instruction::BitCast: 4931 if (!FastISel::selectBitCast(I)) 4932 return selectBitCast(I); 4933 return true; 4934 case Instruction::FPToSI: 4935 if (!selectCast(I, ISD::FP_TO_SINT)) 4936 return selectFPToInt(I, /*Signed=*/true); 4937 return true; 4938 case Instruction::FPToUI: 4939 return selectFPToInt(I, /*Signed=*/false); 4940 case Instruction::ZExt: 4941 case Instruction::SExt: 4942 return selectIntExt(I); 4943 case Instruction::Trunc: 4944 if (!selectCast(I, ISD::TRUNCATE)) 4945 return selectTrunc(I); 4946 return true; 4947 case Instruction::FPExt: 4948 return selectFPExt(I); 4949 case Instruction::FPTrunc: 4950 return selectFPTrunc(I); 4951 case Instruction::SIToFP: 4952 if (!selectCast(I, ISD::SINT_TO_FP)) 4953 return selectIntToFP(I, /*Signed=*/true); 4954 return true; 4955 case Instruction::UIToFP: 4956 return selectIntToFP(I, /*Signed=*/false); 4957 case Instruction::Load: 4958 return selectLoad(I); 4959 case Instruction::Store: 4960 return selectStore(I); 4961 case Instruction::FCmp: 4962 case Instruction::ICmp: 4963 return selectCmp(I); 4964 case Instruction::Select: 4965 return selectSelect(I); 4966 case Instruction::Ret: 4967 return selectRet(I); 4968 case Instruction::FRem: 4969 return selectFRem(I); 4970 case Instruction::GetElementPtr: 4971 return selectGetElementPtr(I); 4972 } 4973 4974 // fall-back to target-independent instruction selection. 4975 return selectOperator(I, I->getOpcode()); 4976 // Silence warnings. 4977 (void)&CC_AArch64_DarwinPCS_VarArg; 4978 } 4979 4980 namespace llvm { 4981 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 4982 const TargetLibraryInfo *LibInfo) { 4983 return new AArch64FastISel(FuncInfo, LibInfo); 4984 } 4985 } 4986