1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the ARM-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // ARMGenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMCallingConv.h" 19 #include "ARMRegisterInfo.h" 20 #include "ARMTargetMachine.h" 21 #include "ARMSubtarget.h" 22 #include "ARMConstantPoolValue.h" 23 #include "MCTargetDesc/ARMAddressingModes.h" 24 #include "llvm/CallingConv.h" 25 #include "llvm/DerivedTypes.h" 26 #include "llvm/GlobalVariable.h" 27 #include "llvm/Instructions.h" 28 #include "llvm/IntrinsicInst.h" 29 #include "llvm/Module.h" 30 #include "llvm/Operator.h" 31 #include "llvm/CodeGen/Analysis.h" 32 #include "llvm/CodeGen/FastISel.h" 33 #include "llvm/CodeGen/FunctionLoweringInfo.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineModuleInfo.h" 36 #include "llvm/CodeGen/MachineConstantPool.h" 37 #include "llvm/CodeGen/MachineFrameInfo.h" 38 #include "llvm/CodeGen/MachineMemOperand.h" 39 #include "llvm/CodeGen/MachineRegisterInfo.h" 40 #include "llvm/Support/CallSite.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/GetElementPtrTypeIterator.h" 44 #include "llvm/Target/TargetData.h" 45 #include "llvm/Target/TargetInstrInfo.h" 46 #include "llvm/Target/TargetLowering.h" 47 #include "llvm/Target/TargetMachine.h" 48 #include "llvm/Target/TargetOptions.h" 49 using namespace llvm; 50 51 static cl::opt<bool> 52 DisableARMFastISel("disable-arm-fast-isel", 53 cl::desc("Turn off experimental ARM fast-isel support"), 54 cl::init(false), cl::Hidden); 55 56 extern cl::opt<bool> EnableARMLongCalls; 57 58 namespace { 59 60 // All possible address modes, plus some. 61 typedef struct Address { 62 enum { 63 RegBase, 64 FrameIndexBase 65 } BaseType; 66 67 union { 68 unsigned Reg; 69 int FI; 70 } Base; 71 72 int Offset; 73 74 // Innocuous defaults for our address. 75 Address() 76 : BaseType(RegBase), Offset(0) { 77 Base.Reg = 0; 78 } 79 } Address; 80 81 class ARMFastISel : public FastISel { 82 83 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 84 /// make the right decision when generating code for different targets. 85 const ARMSubtarget *Subtarget; 86 const TargetMachine &TM; 87 const TargetInstrInfo &TII; 88 const TargetLowering &TLI; 89 ARMFunctionInfo *AFI; 90 91 // Convenience variables to avoid some queries. 92 bool isThumb2; 93 LLVMContext *Context; 94 95 public: 96 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 97 : FastISel(funcInfo), 98 TM(funcInfo.MF->getTarget()), 99 TII(*TM.getInstrInfo()), 100 TLI(*TM.getTargetLowering()) { 101 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 102 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 103 isThumb2 = AFI->isThumbFunction(); 104 Context = &funcInfo.Fn->getContext(); 105 } 106 107 // Code from FastISel.cpp. 108 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 109 const TargetRegisterClass *RC); 110 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 111 const TargetRegisterClass *RC, 112 unsigned Op0, bool Op0IsKill); 113 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 114 const TargetRegisterClass *RC, 115 unsigned Op0, bool Op0IsKill, 116 unsigned Op1, bool Op1IsKill); 117 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 118 const TargetRegisterClass *RC, 119 unsigned Op0, bool Op0IsKill, 120 unsigned Op1, bool Op1IsKill, 121 unsigned Op2, bool Op2IsKill); 122 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 123 const TargetRegisterClass *RC, 124 unsigned Op0, bool Op0IsKill, 125 uint64_t Imm); 126 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 127 const TargetRegisterClass *RC, 128 unsigned Op0, bool Op0IsKill, 129 const ConstantFP *FPImm); 130 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 131 const TargetRegisterClass *RC, 132 unsigned Op0, bool Op0IsKill, 133 unsigned Op1, bool Op1IsKill, 134 uint64_t Imm); 135 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 136 const TargetRegisterClass *RC, 137 uint64_t Imm); 138 virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, 139 const TargetRegisterClass *RC, 140 uint64_t Imm1, uint64_t Imm2); 141 142 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 143 unsigned Op0, bool Op0IsKill, 144 uint32_t Idx); 145 146 // Backend specific FastISel code. 147 virtual bool TargetSelectInstruction(const Instruction *I); 148 virtual unsigned TargetMaterializeConstant(const Constant *C); 149 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 150 virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 151 const LoadInst *LI); 152 153 #include "ARMGenFastISel.inc" 154 155 // Instruction selection routines. 156 private: 157 bool SelectLoad(const Instruction *I); 158 bool SelectStore(const Instruction *I); 159 bool SelectBranch(const Instruction *I); 160 bool SelectCmp(const Instruction *I); 161 bool SelectFPExt(const Instruction *I); 162 bool SelectFPTrunc(const Instruction *I); 163 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 164 bool SelectSIToFP(const Instruction *I); 165 bool SelectFPToSI(const Instruction *I); 166 bool SelectSDiv(const Instruction *I); 167 bool SelectSRem(const Instruction *I); 168 bool SelectCall(const Instruction *I, const char *IntrMemName); 169 bool SelectIntrinsicCall(const IntrinsicInst &I); 170 bool SelectSelect(const Instruction *I); 171 bool SelectRet(const Instruction *I); 172 bool SelectTrunc(const Instruction *I); 173 bool SelectIntExt(const Instruction *I); 174 175 // Utility routines. 176 private: 177 bool isTypeLegal(Type *Ty, MVT &VT); 178 bool isLoadTypeLegal(Type *Ty, MVT &VT); 179 bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, 180 bool isZExt); 181 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, 182 unsigned Alignment = 0, bool isZExt = true, 183 bool allocReg = true); 184 185 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, 186 unsigned Alignment = 0); 187 bool ARMComputeAddress(const Value *Obj, Address &Addr); 188 void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); 189 bool ARMIsMemCpySmall(uint64_t Len); 190 bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); 191 unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); 192 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 193 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 194 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 195 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 196 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 197 unsigned ARMSelectCallOp(const GlobalValue *GV); 198 199 // Call handling routines. 200 private: 201 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 202 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 203 SmallVectorImpl<unsigned> &ArgRegs, 204 SmallVectorImpl<MVT> &ArgVTs, 205 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 206 SmallVectorImpl<unsigned> &RegArgs, 207 CallingConv::ID CC, 208 unsigned &NumBytes); 209 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 210 const Instruction *I, CallingConv::ID CC, 211 unsigned &NumBytes); 212 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 213 214 // OptionalDef handling routines. 215 private: 216 bool isARMNEONPred(const MachineInstr *MI); 217 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 218 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 219 void AddLoadStoreOperands(EVT VT, Address &Addr, 220 const MachineInstrBuilder &MIB, 221 unsigned Flags, bool useAM3); 222 }; 223 224 } // end anonymous namespace 225 226 #include "ARMGenCallingConv.inc" 227 228 // DefinesOptionalPredicate - This is different from DefinesPredicate in that 229 // we don't care about implicit defs here, just places we'll need to add a 230 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 231 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 232 if (!MI->hasOptionalDef()) 233 return false; 234 235 // Look to see if our OptionalDef is defining CPSR or CCR. 236 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 237 const MachineOperand &MO = MI->getOperand(i); 238 if (!MO.isReg() || !MO.isDef()) continue; 239 if (MO.getReg() == ARM::CPSR) 240 *CPSR = true; 241 } 242 return true; 243 } 244 245 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 246 const MCInstrDesc &MCID = MI->getDesc(); 247 248 // If we're a thumb2 or not NEON function we were handled via isPredicable. 249 if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 250 AFI->isThumb2Function()) 251 return false; 252 253 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) 254 if (MCID.OpInfo[i].isPredicate()) 255 return true; 256 257 return false; 258 } 259 260 // If the machine is predicable go ahead and add the predicate operands, if 261 // it needs default CC operands add those. 262 // TODO: If we want to support thumb1 then we'll need to deal with optional 263 // CPSR defs that need to be added before the remaining operands. See s_cc_out 264 // for descriptions why. 265 const MachineInstrBuilder & 266 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 267 MachineInstr *MI = &*MIB; 268 269 // Do we use a predicate? or... 270 // Are we NEON in ARM mode and have a predicate operand? If so, I know 271 // we're not predicable but add it anyways. 272 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 273 AddDefaultPred(MIB); 274 275 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 276 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 277 bool CPSR = false; 278 if (DefinesOptionalPredicate(MI, &CPSR)) { 279 if (CPSR) 280 AddDefaultT1CC(MIB); 281 else 282 AddDefaultCC(MIB); 283 } 284 return MIB; 285 } 286 287 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 288 const TargetRegisterClass* RC) { 289 unsigned ResultReg = createResultReg(RC); 290 const MCInstrDesc &II = TII.get(MachineInstOpcode); 291 292 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 293 return ResultReg; 294 } 295 296 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 297 const TargetRegisterClass *RC, 298 unsigned Op0, bool Op0IsKill) { 299 unsigned ResultReg = createResultReg(RC); 300 const MCInstrDesc &II = TII.get(MachineInstOpcode); 301 302 if (II.getNumDefs() >= 1) 303 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 304 .addReg(Op0, Op0IsKill * RegState::Kill)); 305 else { 306 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 307 .addReg(Op0, Op0IsKill * RegState::Kill)); 308 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 309 TII.get(TargetOpcode::COPY), ResultReg) 310 .addReg(II.ImplicitDefs[0])); 311 } 312 return ResultReg; 313 } 314 315 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 316 const TargetRegisterClass *RC, 317 unsigned Op0, bool Op0IsKill, 318 unsigned Op1, bool Op1IsKill) { 319 unsigned ResultReg = createResultReg(RC); 320 const MCInstrDesc &II = TII.get(MachineInstOpcode); 321 322 if (II.getNumDefs() >= 1) 323 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 324 .addReg(Op0, Op0IsKill * RegState::Kill) 325 .addReg(Op1, Op1IsKill * RegState::Kill)); 326 else { 327 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 328 .addReg(Op0, Op0IsKill * RegState::Kill) 329 .addReg(Op1, Op1IsKill * RegState::Kill)); 330 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 331 TII.get(TargetOpcode::COPY), ResultReg) 332 .addReg(II.ImplicitDefs[0])); 333 } 334 return ResultReg; 335 } 336 337 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 338 const TargetRegisterClass *RC, 339 unsigned Op0, bool Op0IsKill, 340 unsigned Op1, bool Op1IsKill, 341 unsigned Op2, bool Op2IsKill) { 342 unsigned ResultReg = createResultReg(RC); 343 const MCInstrDesc &II = TII.get(MachineInstOpcode); 344 345 if (II.getNumDefs() >= 1) 346 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 347 .addReg(Op0, Op0IsKill * RegState::Kill) 348 .addReg(Op1, Op1IsKill * RegState::Kill) 349 .addReg(Op2, Op2IsKill * RegState::Kill)); 350 else { 351 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 352 .addReg(Op0, Op0IsKill * RegState::Kill) 353 .addReg(Op1, Op1IsKill * RegState::Kill) 354 .addReg(Op2, Op2IsKill * RegState::Kill)); 355 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 356 TII.get(TargetOpcode::COPY), ResultReg) 357 .addReg(II.ImplicitDefs[0])); 358 } 359 return ResultReg; 360 } 361 362 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 363 const TargetRegisterClass *RC, 364 unsigned Op0, bool Op0IsKill, 365 uint64_t Imm) { 366 unsigned ResultReg = createResultReg(RC); 367 const MCInstrDesc &II = TII.get(MachineInstOpcode); 368 369 if (II.getNumDefs() >= 1) 370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 371 .addReg(Op0, Op0IsKill * RegState::Kill) 372 .addImm(Imm)); 373 else { 374 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 375 .addReg(Op0, Op0IsKill * RegState::Kill) 376 .addImm(Imm)); 377 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 378 TII.get(TargetOpcode::COPY), ResultReg) 379 .addReg(II.ImplicitDefs[0])); 380 } 381 return ResultReg; 382 } 383 384 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 385 const TargetRegisterClass *RC, 386 unsigned Op0, bool Op0IsKill, 387 const ConstantFP *FPImm) { 388 unsigned ResultReg = createResultReg(RC); 389 const MCInstrDesc &II = TII.get(MachineInstOpcode); 390 391 if (II.getNumDefs() >= 1) 392 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 393 .addReg(Op0, Op0IsKill * RegState::Kill) 394 .addFPImm(FPImm)); 395 else { 396 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 397 .addReg(Op0, Op0IsKill * RegState::Kill) 398 .addFPImm(FPImm)); 399 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 400 TII.get(TargetOpcode::COPY), ResultReg) 401 .addReg(II.ImplicitDefs[0])); 402 } 403 return ResultReg; 404 } 405 406 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 407 const TargetRegisterClass *RC, 408 unsigned Op0, bool Op0IsKill, 409 unsigned Op1, bool Op1IsKill, 410 uint64_t Imm) { 411 unsigned ResultReg = createResultReg(RC); 412 const MCInstrDesc &II = TII.get(MachineInstOpcode); 413 414 if (II.getNumDefs() >= 1) 415 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 416 .addReg(Op0, Op0IsKill * RegState::Kill) 417 .addReg(Op1, Op1IsKill * RegState::Kill) 418 .addImm(Imm)); 419 else { 420 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 421 .addReg(Op0, Op0IsKill * RegState::Kill) 422 .addReg(Op1, Op1IsKill * RegState::Kill) 423 .addImm(Imm)); 424 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 425 TII.get(TargetOpcode::COPY), ResultReg) 426 .addReg(II.ImplicitDefs[0])); 427 } 428 return ResultReg; 429 } 430 431 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 432 const TargetRegisterClass *RC, 433 uint64_t Imm) { 434 unsigned ResultReg = createResultReg(RC); 435 const MCInstrDesc &II = TII.get(MachineInstOpcode); 436 437 if (II.getNumDefs() >= 1) 438 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 439 .addImm(Imm)); 440 else { 441 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 442 .addImm(Imm)); 443 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 444 TII.get(TargetOpcode::COPY), ResultReg) 445 .addReg(II.ImplicitDefs[0])); 446 } 447 return ResultReg; 448 } 449 450 unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, 451 const TargetRegisterClass *RC, 452 uint64_t Imm1, uint64_t Imm2) { 453 unsigned ResultReg = createResultReg(RC); 454 const MCInstrDesc &II = TII.get(MachineInstOpcode); 455 456 if (II.getNumDefs() >= 1) 457 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 458 .addImm(Imm1).addImm(Imm2)); 459 else { 460 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 461 .addImm(Imm1).addImm(Imm2)); 462 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 463 TII.get(TargetOpcode::COPY), 464 ResultReg) 465 .addReg(II.ImplicitDefs[0])); 466 } 467 return ResultReg; 468 } 469 470 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 471 unsigned Op0, bool Op0IsKill, 472 uint32_t Idx) { 473 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 474 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 475 "Cannot yet extract from physregs"); 476 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 477 DL, TII.get(TargetOpcode::COPY), ResultReg) 478 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 479 return ResultReg; 480 } 481 482 // TODO: Don't worry about 64-bit now, but when this is fixed remove the 483 // checks from the various callers. 484 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 485 if (VT == MVT::f64) return 0; 486 487 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 488 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 489 TII.get(ARM::VMOVRS), MoveReg) 490 .addReg(SrcReg)); 491 return MoveReg; 492 } 493 494 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 495 if (VT == MVT::i64) return 0; 496 497 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 498 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 499 TII.get(ARM::VMOVSR), MoveReg) 500 .addReg(SrcReg)); 501 return MoveReg; 502 } 503 504 // For double width floating point we need to materialize two constants 505 // (the high and the low) into integer registers then use a move to get 506 // the combined constant into an FP reg. 507 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 508 const APFloat Val = CFP->getValueAPF(); 509 bool is64bit = VT == MVT::f64; 510 511 // This checks to see if we can use VFP3 instructions to materialize 512 // a constant, otherwise we have to go through the constant pool. 513 if (TLI.isFPImmLegal(Val, VT)) { 514 int Imm; 515 unsigned Opc; 516 if (is64bit) { 517 Imm = ARM_AM::getFP64Imm(Val); 518 Opc = ARM::FCONSTD; 519 } else { 520 Imm = ARM_AM::getFP32Imm(Val); 521 Opc = ARM::FCONSTS; 522 } 523 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 524 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 525 DestReg) 526 .addImm(Imm)); 527 return DestReg; 528 } 529 530 // Require VFP2 for loading fp constants. 531 if (!Subtarget->hasVFP2()) return false; 532 533 // MachineConstantPool wants an explicit alignment. 534 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 535 if (Align == 0) { 536 // TODO: Figure out if this is correct. 537 Align = TD.getTypeAllocSize(CFP->getType()); 538 } 539 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 540 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 541 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 542 543 // The extra reg is for addrmode5. 544 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 545 DestReg) 546 .addConstantPoolIndex(Idx) 547 .addReg(0)); 548 return DestReg; 549 } 550 551 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 552 553 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) 554 return false; 555 556 // If we can do this in a single instruction without a constant pool entry 557 // do so now. 558 const ConstantInt *CI = cast<ConstantInt>(C); 559 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { 560 unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; 561 unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 562 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 563 TII.get(Opc), ImmReg) 564 .addImm(CI->getZExtValue())); 565 return ImmReg; 566 } 567 568 // Use MVN to emit negative constants. 569 if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { 570 unsigned Imm = (unsigned)~(CI->getSExtValue()); 571 bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 572 (ARM_AM::getSOImmVal(Imm) != -1); 573 if (UseImm) { 574 unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; 575 unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 576 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 577 TII.get(Opc), ImmReg) 578 .addImm(Imm)); 579 return ImmReg; 580 } 581 } 582 583 // Load from constant pool. For now 32-bit only. 584 if (VT != MVT::i32) 585 return false; 586 587 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 588 589 // MachineConstantPool wants an explicit alignment. 590 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 591 if (Align == 0) { 592 // TODO: Figure out if this is correct. 593 Align = TD.getTypeAllocSize(C->getType()); 594 } 595 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 596 597 if (isThumb2) 598 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 599 TII.get(ARM::t2LDRpci), DestReg) 600 .addConstantPoolIndex(Idx)); 601 else 602 // The extra immediate is for addrmode2. 603 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 604 TII.get(ARM::LDRcp), DestReg) 605 .addConstantPoolIndex(Idx) 606 .addImm(0)); 607 608 return DestReg; 609 } 610 611 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 612 // For now 32-bit only. 613 if (VT != MVT::i32) return 0; 614 615 Reloc::Model RelocM = TM.getRelocationModel(); 616 617 // TODO: Need more magic for ARM PIC. 618 if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; 619 620 // MachineConstantPool wants an explicit alignment. 621 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 622 if (Align == 0) { 623 // TODO: Figure out if this is correct. 624 Align = TD.getTypeAllocSize(GV->getType()); 625 } 626 627 // Grab index. 628 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 629 unsigned Id = AFI->createPICLabelUId(); 630 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, 631 ARMCP::CPValue, 632 PCAdj); 633 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 634 635 // Load value. 636 MachineInstrBuilder MIB; 637 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 638 if (isThumb2) { 639 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 640 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 641 .addConstantPoolIndex(Idx); 642 if (RelocM == Reloc::PIC_) 643 MIB.addImm(Id); 644 } else { 645 // The extra immediate is for addrmode2. 646 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 647 DestReg) 648 .addConstantPoolIndex(Idx) 649 .addImm(0); 650 } 651 AddOptionalDefs(MIB); 652 653 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { 654 unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); 655 if (isThumb2) 656 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 657 TII.get(ARM::t2LDRi12), NewDestReg) 658 .addReg(DestReg) 659 .addImm(0); 660 else 661 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12), 662 NewDestReg) 663 .addReg(DestReg) 664 .addImm(0); 665 DestReg = NewDestReg; 666 AddOptionalDefs(MIB); 667 } 668 669 return DestReg; 670 } 671 672 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 673 EVT VT = TLI.getValueType(C->getType(), true); 674 675 // Only handle simple types. 676 if (!VT.isSimple()) return 0; 677 678 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 679 return ARMMaterializeFP(CFP, VT); 680 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 681 return ARMMaterializeGV(GV, VT); 682 else if (isa<ConstantInt>(C)) 683 return ARMMaterializeInt(C, VT); 684 685 return 0; 686 } 687 688 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); 689 690 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 691 // Don't handle dynamic allocas. 692 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 693 694 MVT VT; 695 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 696 697 DenseMap<const AllocaInst*, int>::iterator SI = 698 FuncInfo.StaticAllocaMap.find(AI); 699 700 // This will get lowered later into the correct offsets and registers 701 // via rewriteXFrameIndex. 702 if (SI != FuncInfo.StaticAllocaMap.end()) { 703 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 704 unsigned ResultReg = createResultReg(RC); 705 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; 706 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 707 TII.get(Opc), ResultReg) 708 .addFrameIndex(SI->second) 709 .addImm(0)); 710 return ResultReg; 711 } 712 713 return 0; 714 } 715 716 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { 717 EVT evt = TLI.getValueType(Ty, true); 718 719 // Only handle simple types. 720 if (evt == MVT::Other || !evt.isSimple()) return false; 721 VT = evt.getSimpleVT(); 722 723 // Handle all legal types, i.e. a register that will directly hold this 724 // value. 725 return TLI.isTypeLegal(VT); 726 } 727 728 bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { 729 if (isTypeLegal(Ty, VT)) return true; 730 731 // If this is a type than can be sign or zero-extended to a basic operation 732 // go ahead and accept it now. 733 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 734 return true; 735 736 return false; 737 } 738 739 // Computes the address to get to an object. 740 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 741 // Some boilerplate from the X86 FastISel. 742 const User *U = NULL; 743 unsigned Opcode = Instruction::UserOp1; 744 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 745 // Don't walk into other basic blocks unless the object is an alloca from 746 // another block, otherwise it may not have a virtual register assigned. 747 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 748 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 749 Opcode = I->getOpcode(); 750 U = I; 751 } 752 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 753 Opcode = C->getOpcode(); 754 U = C; 755 } 756 757 if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 758 if (Ty->getAddressSpace() > 255) 759 // Fast instruction selection doesn't support the special 760 // address spaces. 761 return false; 762 763 switch (Opcode) { 764 default: 765 break; 766 case Instruction::BitCast: { 767 // Look through bitcasts. 768 return ARMComputeAddress(U->getOperand(0), Addr); 769 } 770 case Instruction::IntToPtr: { 771 // Look past no-op inttoptrs. 772 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 773 return ARMComputeAddress(U->getOperand(0), Addr); 774 break; 775 } 776 case Instruction::PtrToInt: { 777 // Look past no-op ptrtoints. 778 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 779 return ARMComputeAddress(U->getOperand(0), Addr); 780 break; 781 } 782 case Instruction::GetElementPtr: { 783 Address SavedAddr = Addr; 784 int TmpOffset = Addr.Offset; 785 786 // Iterate through the GEP folding the constants into offsets where 787 // we can. 788 gep_type_iterator GTI = gep_type_begin(U); 789 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 790 i != e; ++i, ++GTI) { 791 const Value *Op = *i; 792 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 793 const StructLayout *SL = TD.getStructLayout(STy); 794 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 795 TmpOffset += SL->getElementOffset(Idx); 796 } else { 797 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 798 for (;;) { 799 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 800 // Constant-offset addressing. 801 TmpOffset += CI->getSExtValue() * S; 802 break; 803 } 804 if (isa<AddOperator>(Op) && 805 (!isa<Instruction>(Op) || 806 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 807 == FuncInfo.MBB) && 808 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 809 // An add (in the same block) with a constant operand. Fold the 810 // constant. 811 ConstantInt *CI = 812 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 813 TmpOffset += CI->getSExtValue() * S; 814 // Iterate on the other operand. 815 Op = cast<AddOperator>(Op)->getOperand(0); 816 continue; 817 } 818 // Unsupported 819 goto unsupported_gep; 820 } 821 } 822 } 823 824 // Try to grab the base operand now. 825 Addr.Offset = TmpOffset; 826 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 827 828 // We failed, restore everything and try the other options. 829 Addr = SavedAddr; 830 831 unsupported_gep: 832 break; 833 } 834 case Instruction::Alloca: { 835 const AllocaInst *AI = cast<AllocaInst>(Obj); 836 DenseMap<const AllocaInst*, int>::iterator SI = 837 FuncInfo.StaticAllocaMap.find(AI); 838 if (SI != FuncInfo.StaticAllocaMap.end()) { 839 Addr.BaseType = Address::FrameIndexBase; 840 Addr.Base.FI = SI->second; 841 return true; 842 } 843 break; 844 } 845 } 846 847 // Materialize the global variable's address into a reg which can 848 // then be used later to load the variable. 849 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 850 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 851 if (Tmp == 0) return false; 852 853 Addr.Base.Reg = Tmp; 854 return true; 855 } 856 857 // Try to get this in a register if nothing else has worked. 858 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 859 return Addr.Base.Reg != 0; 860 } 861 862 void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { 863 864 assert(VT.isSimple() && "Non-simple types are invalid here!"); 865 866 bool needsLowering = false; 867 switch (VT.getSimpleVT().SimpleTy) { 868 default: 869 assert(false && "Unhandled load/store type!"); 870 break; 871 case MVT::i1: 872 case MVT::i8: 873 case MVT::i16: 874 case MVT::i32: 875 if (!useAM3) { 876 // Integer loads/stores handle 12-bit offsets. 877 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 878 // Handle negative offsets. 879 if (needsLowering && isThumb2) 880 needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && 881 Addr.Offset > -256); 882 } else { 883 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. 884 needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); 885 } 886 break; 887 case MVT::f32: 888 case MVT::f64: 889 // Floating point operands handle 8-bit offsets. 890 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 891 break; 892 } 893 894 // If this is a stack pointer and the offset needs to be simplified then 895 // put the alloca address into a register, set the base type back to 896 // register and continue. This should almost never happen. 897 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 898 TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass : 899 ARM::GPRRegisterClass; 900 unsigned ResultReg = createResultReg(RC); 901 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; 902 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 903 TII.get(Opc), ResultReg) 904 .addFrameIndex(Addr.Base.FI) 905 .addImm(0)); 906 Addr.Base.Reg = ResultReg; 907 Addr.BaseType = Address::RegBase; 908 } 909 910 // Since the offset is too large for the load/store instruction 911 // get the reg+offset into a register. 912 if (needsLowering) { 913 Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, 914 /*Op0IsKill*/false, Addr.Offset, MVT::i32); 915 Addr.Offset = 0; 916 } 917 } 918 919 void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 920 const MachineInstrBuilder &MIB, 921 unsigned Flags, bool useAM3) { 922 // addrmode5 output depends on the selection dag addressing dividing the 923 // offset by 4 that it then later multiplies. Do this here as well. 924 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 925 VT.getSimpleVT().SimpleTy == MVT::f64) 926 Addr.Offset /= 4; 927 928 // Frame base works a bit differently. Handle it separately. 929 if (Addr.BaseType == Address::FrameIndexBase) { 930 int FI = Addr.Base.FI; 931 int Offset = Addr.Offset; 932 MachineMemOperand *MMO = 933 FuncInfo.MF->getMachineMemOperand( 934 MachinePointerInfo::getFixedStack(FI, Offset), 935 Flags, 936 MFI.getObjectSize(FI), 937 MFI.getObjectAlignment(FI)); 938 // Now add the rest of the operands. 939 MIB.addFrameIndex(FI); 940 941 // ARM halfword load/stores and signed byte loads need an additional 942 // operand. 943 if (useAM3) { 944 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; 945 MIB.addReg(0); 946 MIB.addImm(Imm); 947 } else { 948 MIB.addImm(Addr.Offset); 949 } 950 MIB.addMemOperand(MMO); 951 } else { 952 // Now add the rest of the operands. 953 MIB.addReg(Addr.Base.Reg); 954 955 // ARM halfword load/stores and signed byte loads need an additional 956 // operand. 957 if (useAM3) { 958 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; 959 MIB.addReg(0); 960 MIB.addImm(Imm); 961 } else { 962 MIB.addImm(Addr.Offset); 963 } 964 } 965 AddOptionalDefs(MIB); 966 } 967 968 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, 969 unsigned Alignment, bool isZExt, bool allocReg) { 970 assert(VT.isSimple() && "Non-simple types are invalid here!"); 971 unsigned Opc; 972 bool useAM3 = false; 973 bool needVMOV = false; 974 TargetRegisterClass *RC; 975 switch (VT.getSimpleVT().SimpleTy) { 976 // This is mostly going to be Neon/vector support. 977 default: return false; 978 case MVT::i1: 979 case MVT::i8: 980 if (isThumb2) { 981 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 982 Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; 983 else 984 Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; 985 } else { 986 if (isZExt) { 987 Opc = ARM::LDRBi12; 988 } else { 989 Opc = ARM::LDRSB; 990 useAM3 = true; 991 } 992 } 993 RC = ARM::GPRRegisterClass; 994 break; 995 case MVT::i16: 996 if (isThumb2) { 997 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 998 Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; 999 else 1000 Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; 1001 } else { 1002 Opc = isZExt ? ARM::LDRH : ARM::LDRSH; 1003 useAM3 = true; 1004 } 1005 RC = ARM::GPRRegisterClass; 1006 break; 1007 case MVT::i32: 1008 if (isThumb2) { 1009 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1010 Opc = ARM::t2LDRi8; 1011 else 1012 Opc = ARM::t2LDRi12; 1013 } else { 1014 Opc = ARM::LDRi12; 1015 } 1016 RC = ARM::GPRRegisterClass; 1017 break; 1018 case MVT::f32: 1019 if (!Subtarget->hasVFP2()) return false; 1020 // Unaligned loads need special handling. Floats require word-alignment. 1021 if (Alignment && Alignment < 4) { 1022 needVMOV = true; 1023 VT = MVT::i32; 1024 Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; 1025 RC = ARM::GPRRegisterClass; 1026 } else { 1027 Opc = ARM::VLDRS; 1028 RC = TLI.getRegClassFor(VT); 1029 } 1030 break; 1031 case MVT::f64: 1032 if (!Subtarget->hasVFP2()) return false; 1033 // FIXME: Unaligned loads need special handling. Doublewords require 1034 // word-alignment. 1035 if (Alignment && Alignment < 4) 1036 return false; 1037 1038 Opc = ARM::VLDRD; 1039 RC = TLI.getRegClassFor(VT); 1040 break; 1041 } 1042 // Simplify this down to something we can handle. 1043 ARMSimplifyAddress(Addr, VT, useAM3); 1044 1045 // Create the base instruction, then add the operands. 1046 if (allocReg) 1047 ResultReg = createResultReg(RC); 1048 assert (ResultReg > 255 && "Expected an allocated virtual register."); 1049 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1050 TII.get(Opc), ResultReg); 1051 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); 1052 1053 // If we had an unaligned load of a float we've converted it to an regular 1054 // load. Now we must move from the GRP to the FP register. 1055 if (needVMOV) { 1056 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1057 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1058 TII.get(ARM::VMOVSR), MoveReg) 1059 .addReg(ResultReg)); 1060 ResultReg = MoveReg; 1061 } 1062 return true; 1063 } 1064 1065 bool ARMFastISel::SelectLoad(const Instruction *I) { 1066 // Atomic loads need special handling. 1067 if (cast<LoadInst>(I)->isAtomic()) 1068 return false; 1069 1070 // Verify we have a legal type before going any further. 1071 MVT VT; 1072 if (!isLoadTypeLegal(I->getType(), VT)) 1073 return false; 1074 1075 // See if we can handle this address. 1076 Address Addr; 1077 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 1078 1079 unsigned ResultReg; 1080 if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) 1081 return false; 1082 UpdateValueMap(I, ResultReg); 1083 return true; 1084 } 1085 1086 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, 1087 unsigned Alignment) { 1088 unsigned StrOpc; 1089 bool useAM3 = false; 1090 switch (VT.getSimpleVT().SimpleTy) { 1091 // This is mostly going to be Neon/vector support. 1092 default: return false; 1093 case MVT::i1: { 1094 unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass : 1095 ARM::GPRRegisterClass); 1096 unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; 1097 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1098 TII.get(Opc), Res) 1099 .addReg(SrcReg).addImm(1)); 1100 SrcReg = Res; 1101 } // Fallthrough here. 1102 case MVT::i8: 1103 if (isThumb2) { 1104 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1105 StrOpc = ARM::t2STRBi8; 1106 else 1107 StrOpc = ARM::t2STRBi12; 1108 } else { 1109 StrOpc = ARM::STRBi12; 1110 } 1111 break; 1112 case MVT::i16: 1113 if (isThumb2) { 1114 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1115 StrOpc = ARM::t2STRHi8; 1116 else 1117 StrOpc = ARM::t2STRHi12; 1118 } else { 1119 StrOpc = ARM::STRH; 1120 useAM3 = true; 1121 } 1122 break; 1123 case MVT::i32: 1124 if (isThumb2) { 1125 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1126 StrOpc = ARM::t2STRi8; 1127 else 1128 StrOpc = ARM::t2STRi12; 1129 } else { 1130 StrOpc = ARM::STRi12; 1131 } 1132 break; 1133 case MVT::f32: 1134 if (!Subtarget->hasVFP2()) return false; 1135 // Unaligned stores need special handling. Floats require word-alignment. 1136 if (Alignment && Alignment < 4) { 1137 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 1138 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1139 TII.get(ARM::VMOVRS), MoveReg) 1140 .addReg(SrcReg)); 1141 SrcReg = MoveReg; 1142 VT = MVT::i32; 1143 StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; 1144 } else { 1145 StrOpc = ARM::VSTRS; 1146 } 1147 break; 1148 case MVT::f64: 1149 if (!Subtarget->hasVFP2()) return false; 1150 // FIXME: Unaligned stores need special handling. Doublewords require 1151 // word-alignment. 1152 if (Alignment && Alignment < 4) 1153 return false; 1154 1155 StrOpc = ARM::VSTRD; 1156 break; 1157 } 1158 // Simplify this down to something we can handle. 1159 ARMSimplifyAddress(Addr, VT, useAM3); 1160 1161 // Create the base instruction, then add the operands. 1162 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1163 TII.get(StrOpc)) 1164 .addReg(SrcReg); 1165 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); 1166 return true; 1167 } 1168 1169 bool ARMFastISel::SelectStore(const Instruction *I) { 1170 Value *Op0 = I->getOperand(0); 1171 unsigned SrcReg = 0; 1172 1173 // Atomic stores need special handling. 1174 if (cast<StoreInst>(I)->isAtomic()) 1175 return false; 1176 1177 // Verify we have a legal type before going any further. 1178 MVT VT; 1179 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 1180 return false; 1181 1182 // Get the value to be stored into a register. 1183 SrcReg = getRegForValue(Op0); 1184 if (SrcReg == 0) return false; 1185 1186 // See if we can handle this address. 1187 Address Addr; 1188 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1189 return false; 1190 1191 if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) 1192 return false; 1193 return true; 1194 } 1195 1196 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1197 switch (Pred) { 1198 // Needs two compares... 1199 case CmpInst::FCMP_ONE: 1200 case CmpInst::FCMP_UEQ: 1201 default: 1202 // AL is our "false" for now. The other two need more compares. 1203 return ARMCC::AL; 1204 case CmpInst::ICMP_EQ: 1205 case CmpInst::FCMP_OEQ: 1206 return ARMCC::EQ; 1207 case CmpInst::ICMP_SGT: 1208 case CmpInst::FCMP_OGT: 1209 return ARMCC::GT; 1210 case CmpInst::ICMP_SGE: 1211 case CmpInst::FCMP_OGE: 1212 return ARMCC::GE; 1213 case CmpInst::ICMP_UGT: 1214 case CmpInst::FCMP_UGT: 1215 return ARMCC::HI; 1216 case CmpInst::FCMP_OLT: 1217 return ARMCC::MI; 1218 case CmpInst::ICMP_ULE: 1219 case CmpInst::FCMP_OLE: 1220 return ARMCC::LS; 1221 case CmpInst::FCMP_ORD: 1222 return ARMCC::VC; 1223 case CmpInst::FCMP_UNO: 1224 return ARMCC::VS; 1225 case CmpInst::FCMP_UGE: 1226 return ARMCC::PL; 1227 case CmpInst::ICMP_SLT: 1228 case CmpInst::FCMP_ULT: 1229 return ARMCC::LT; 1230 case CmpInst::ICMP_SLE: 1231 case CmpInst::FCMP_ULE: 1232 return ARMCC::LE; 1233 case CmpInst::FCMP_UNE: 1234 case CmpInst::ICMP_NE: 1235 return ARMCC::NE; 1236 case CmpInst::ICMP_UGE: 1237 return ARMCC::HS; 1238 case CmpInst::ICMP_ULT: 1239 return ARMCC::LO; 1240 } 1241 } 1242 1243 bool ARMFastISel::SelectBranch(const Instruction *I) { 1244 const BranchInst *BI = cast<BranchInst>(I); 1245 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1246 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1247 1248 // Simple branch support. 1249 1250 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1251 // behavior. 1252 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1253 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1254 1255 // Get the compare predicate. 1256 // Try to take advantage of fallthrough opportunities. 1257 CmpInst::Predicate Predicate = CI->getPredicate(); 1258 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1259 std::swap(TBB, FBB); 1260 Predicate = CmpInst::getInversePredicate(Predicate); 1261 } 1262 1263 ARMCC::CondCodes ARMPred = getComparePred(Predicate); 1264 1265 // We may not handle every CC for now. 1266 if (ARMPred == ARMCC::AL) return false; 1267 1268 // Emit the compare. 1269 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 1270 return false; 1271 1272 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1273 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1274 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1275 FastEmitBranch(FBB, DL); 1276 FuncInfo.MBB->addSuccessor(TBB); 1277 return true; 1278 } 1279 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1280 MVT SourceVT; 1281 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1282 (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { 1283 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; 1284 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1285 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1286 TII.get(TstOpc)) 1287 .addReg(OpReg).addImm(1)); 1288 1289 unsigned CCMode = ARMCC::NE; 1290 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1291 std::swap(TBB, FBB); 1292 CCMode = ARMCC::EQ; 1293 } 1294 1295 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1296 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1297 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1298 1299 FastEmitBranch(FBB, DL); 1300 FuncInfo.MBB->addSuccessor(TBB); 1301 return true; 1302 } 1303 } else if (const ConstantInt *CI = 1304 dyn_cast<ConstantInt>(BI->getCondition())) { 1305 uint64_t Imm = CI->getZExtValue(); 1306 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 1307 FastEmitBranch(Target, DL); 1308 return true; 1309 } 1310 1311 unsigned CmpReg = getRegForValue(BI->getCondition()); 1312 if (CmpReg == 0) return false; 1313 1314 // We've been divorced from our compare! Our block was split, and 1315 // now our compare lives in a predecessor block. We musn't 1316 // re-compare here, as the children of the compare aren't guaranteed 1317 // live across the block boundary (we *could* check for this). 1318 // Regardless, the compare has been done in the predecessor block, 1319 // and it left a value for us in a virtual register. Ergo, we test 1320 // the one-bit value left in the virtual register. 1321 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; 1322 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) 1323 .addReg(CmpReg).addImm(1)); 1324 1325 unsigned CCMode = ARMCC::NE; 1326 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1327 std::swap(TBB, FBB); 1328 CCMode = ARMCC::EQ; 1329 } 1330 1331 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1332 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1333 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1334 FastEmitBranch(FBB, DL); 1335 FuncInfo.MBB->addSuccessor(TBB); 1336 return true; 1337 } 1338 1339 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, 1340 bool isZExt) { 1341 Type *Ty = Src1Value->getType(); 1342 EVT SrcVT = TLI.getValueType(Ty, true); 1343 if (!SrcVT.isSimple()) return false; 1344 1345 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); 1346 if (isFloat && !Subtarget->hasVFP2()) 1347 return false; 1348 1349 // Check to see if the 2nd operand is a constant that we can encode directly 1350 // in the compare. 1351 int Imm = 0; 1352 bool UseImm = false; 1353 bool isNegativeImm = false; 1354 // FIXME: At -O0 we don't have anything that canonicalizes operand order. 1355 // Thus, Src1Value may be a ConstantInt, but we're missing it. 1356 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { 1357 if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || 1358 SrcVT == MVT::i1) { 1359 const APInt &CIVal = ConstInt->getValue(); 1360 Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); 1361 if (Imm < 0) { 1362 isNegativeImm = true; 1363 Imm = -Imm; 1364 } 1365 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 1366 (ARM_AM::getSOImmVal(Imm) != -1); 1367 } 1368 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { 1369 if (SrcVT == MVT::f32 || SrcVT == MVT::f64) 1370 if (ConstFP->isZero() && !ConstFP->isNegative()) 1371 UseImm = true; 1372 } 1373 1374 unsigned CmpOpc; 1375 bool isICmp = true; 1376 bool needsExt = false; 1377 switch (SrcVT.getSimpleVT().SimpleTy) { 1378 default: return false; 1379 // TODO: Verify compares. 1380 case MVT::f32: 1381 isICmp = false; 1382 CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; 1383 break; 1384 case MVT::f64: 1385 isICmp = false; 1386 CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; 1387 break; 1388 case MVT::i1: 1389 case MVT::i8: 1390 case MVT::i16: 1391 needsExt = true; 1392 // Intentional fall-through. 1393 case MVT::i32: 1394 if (isThumb2) { 1395 if (!UseImm) 1396 CmpOpc = ARM::t2CMPrr; 1397 else 1398 CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; 1399 } else { 1400 if (!UseImm) 1401 CmpOpc = ARM::CMPrr; 1402 else 1403 CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; 1404 } 1405 break; 1406 } 1407 1408 unsigned SrcReg1 = getRegForValue(Src1Value); 1409 if (SrcReg1 == 0) return false; 1410 1411 unsigned SrcReg2 = 0; 1412 if (!UseImm) { 1413 SrcReg2 = getRegForValue(Src2Value); 1414 if (SrcReg2 == 0) return false; 1415 } 1416 1417 // We have i1, i8, or i16, we need to either zero extend or sign extend. 1418 if (needsExt) { 1419 unsigned ResultReg; 1420 ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); 1421 if (ResultReg == 0) return false; 1422 SrcReg1 = ResultReg; 1423 if (!UseImm) { 1424 ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); 1425 if (ResultReg == 0) return false; 1426 SrcReg2 = ResultReg; 1427 } 1428 } 1429 1430 if (!UseImm) { 1431 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1432 TII.get(CmpOpc)) 1433 .addReg(SrcReg1).addReg(SrcReg2)); 1434 } else { 1435 MachineInstrBuilder MIB; 1436 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1437 .addReg(SrcReg1); 1438 1439 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. 1440 if (isICmp) 1441 MIB.addImm(Imm); 1442 AddOptionalDefs(MIB); 1443 } 1444 1445 // For floating point we need to move the result to a comparison register 1446 // that we can then use for branches. 1447 if (Ty->isFloatTy() || Ty->isDoubleTy()) 1448 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1449 TII.get(ARM::FMSTAT))); 1450 return true; 1451 } 1452 1453 bool ARMFastISel::SelectCmp(const Instruction *I) { 1454 const CmpInst *CI = cast<CmpInst>(I); 1455 Type *Ty = CI->getOperand(0)->getType(); 1456 1457 // Get the compare predicate. 1458 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1459 1460 // We may not handle every CC for now. 1461 if (ARMPred == ARMCC::AL) return false; 1462 1463 // Emit the compare. 1464 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 1465 return false; 1466 1467 // Now set a register based on the comparison. Explicitly set the predicates 1468 // here. 1469 unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; 1470 TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass 1471 : ARM::GPRRegisterClass; 1472 unsigned DestReg = createResultReg(RC); 1473 Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1474 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1475 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); 1476 unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; 1477 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1478 .addReg(ZeroReg).addImm(1) 1479 .addImm(ARMPred).addReg(CondReg); 1480 1481 UpdateValueMap(I, DestReg); 1482 return true; 1483 } 1484 1485 bool ARMFastISel::SelectFPExt(const Instruction *I) { 1486 // Make sure we have VFP and that we're extending float to double. 1487 if (!Subtarget->hasVFP2()) return false; 1488 1489 Value *V = I->getOperand(0); 1490 if (!I->getType()->isDoubleTy() || 1491 !V->getType()->isFloatTy()) return false; 1492 1493 unsigned Op = getRegForValue(V); 1494 if (Op == 0) return false; 1495 1496 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1497 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1498 TII.get(ARM::VCVTDS), Result) 1499 .addReg(Op)); 1500 UpdateValueMap(I, Result); 1501 return true; 1502 } 1503 1504 bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1505 // Make sure we have VFP and that we're truncating double to float. 1506 if (!Subtarget->hasVFP2()) return false; 1507 1508 Value *V = I->getOperand(0); 1509 if (!(I->getType()->isFloatTy() && 1510 V->getType()->isDoubleTy())) return false; 1511 1512 unsigned Op = getRegForValue(V); 1513 if (Op == 0) return false; 1514 1515 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1516 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1517 TII.get(ARM::VCVTSD), Result) 1518 .addReg(Op)); 1519 UpdateValueMap(I, Result); 1520 return true; 1521 } 1522 1523 bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1524 // Make sure we have VFP. 1525 if (!Subtarget->hasVFP2()) return false; 1526 1527 MVT DstVT; 1528 Type *Ty = I->getType(); 1529 if (!isTypeLegal(Ty, DstVT)) 1530 return false; 1531 1532 Value *Src = I->getOperand(0); 1533 EVT SrcVT = TLI.getValueType(Src->getType(), true); 1534 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) 1535 return false; 1536 1537 unsigned SrcReg = getRegForValue(Src); 1538 if (SrcReg == 0) return false; 1539 1540 // Handle sign-extension. 1541 if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { 1542 EVT DestVT = MVT::i32; 1543 unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, /*isZExt*/ false); 1544 if (ResultReg == 0) return false; 1545 SrcReg = ResultReg; 1546 } 1547 1548 // The conversion routine works on fp-reg to fp-reg and the operand above 1549 // was an integer, move it to the fp registers if possible. 1550 unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg); 1551 if (FP == 0) return false; 1552 1553 unsigned Opc; 1554 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1555 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1556 else return false; 1557 1558 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1559 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1560 ResultReg) 1561 .addReg(FP)); 1562 UpdateValueMap(I, ResultReg); 1563 return true; 1564 } 1565 1566 bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1567 // Make sure we have VFP. 1568 if (!Subtarget->hasVFP2()) return false; 1569 1570 MVT DstVT; 1571 Type *RetTy = I->getType(); 1572 if (!isTypeLegal(RetTy, DstVT)) 1573 return false; 1574 1575 unsigned Op = getRegForValue(I->getOperand(0)); 1576 if (Op == 0) return false; 1577 1578 unsigned Opc; 1579 Type *OpTy = I->getOperand(0)->getType(); 1580 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1581 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1582 else return false; 1583 1584 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1585 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1586 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1587 ResultReg) 1588 .addReg(Op)); 1589 1590 // This result needs to be in an integer register, but the conversion only 1591 // takes place in fp-regs. 1592 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1593 if (IntReg == 0) return false; 1594 1595 UpdateValueMap(I, IntReg); 1596 return true; 1597 } 1598 1599 bool ARMFastISel::SelectSelect(const Instruction *I) { 1600 MVT VT; 1601 if (!isTypeLegal(I->getType(), VT)) 1602 return false; 1603 1604 // Things need to be register sized for register moves. 1605 if (VT != MVT::i32) return false; 1606 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1607 1608 unsigned CondReg = getRegForValue(I->getOperand(0)); 1609 if (CondReg == 0) return false; 1610 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1611 if (Op1Reg == 0) return false; 1612 1613 // Check to see if we can use an immediate in the conditional move. 1614 int Imm = 0; 1615 bool UseImm = false; 1616 bool isNegativeImm = false; 1617 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { 1618 assert (VT == MVT::i32 && "Expecting an i32."); 1619 Imm = (int)ConstInt->getValue().getZExtValue(); 1620 if (Imm < 0) { 1621 isNegativeImm = true; 1622 Imm = ~Imm; 1623 } 1624 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 1625 (ARM_AM::getSOImmVal(Imm) != -1); 1626 } 1627 1628 unsigned Op2Reg = 0; 1629 if (!UseImm) { 1630 Op2Reg = getRegForValue(I->getOperand(2)); 1631 if (Op2Reg == 0) return false; 1632 } 1633 1634 unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; 1635 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1636 .addReg(CondReg).addImm(0)); 1637 1638 unsigned MovCCOpc; 1639 if (!UseImm) { 1640 MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; 1641 } else { 1642 if (!isNegativeImm) { 1643 MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; 1644 } else { 1645 MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; 1646 } 1647 } 1648 unsigned ResultReg = createResultReg(RC); 1649 if (!UseImm) 1650 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1651 .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); 1652 else 1653 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1654 .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); 1655 UpdateValueMap(I, ResultReg); 1656 return true; 1657 } 1658 1659 bool ARMFastISel::SelectSDiv(const Instruction *I) { 1660 MVT VT; 1661 Type *Ty = I->getType(); 1662 if (!isTypeLegal(Ty, VT)) 1663 return false; 1664 1665 // If we have integer div support we should have selected this automagically. 1666 // In case we have a real miss go ahead and return false and we'll pick 1667 // it up later. 1668 if (Subtarget->hasDivide()) return false; 1669 1670 // Otherwise emit a libcall. 1671 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1672 if (VT == MVT::i8) 1673 LC = RTLIB::SDIV_I8; 1674 else if (VT == MVT::i16) 1675 LC = RTLIB::SDIV_I16; 1676 else if (VT == MVT::i32) 1677 LC = RTLIB::SDIV_I32; 1678 else if (VT == MVT::i64) 1679 LC = RTLIB::SDIV_I64; 1680 else if (VT == MVT::i128) 1681 LC = RTLIB::SDIV_I128; 1682 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1683 1684 return ARMEmitLibcall(I, LC); 1685 } 1686 1687 bool ARMFastISel::SelectSRem(const Instruction *I) { 1688 MVT VT; 1689 Type *Ty = I->getType(); 1690 if (!isTypeLegal(Ty, VT)) 1691 return false; 1692 1693 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1694 if (VT == MVT::i8) 1695 LC = RTLIB::SREM_I8; 1696 else if (VT == MVT::i16) 1697 LC = RTLIB::SREM_I16; 1698 else if (VT == MVT::i32) 1699 LC = RTLIB::SREM_I32; 1700 else if (VT == MVT::i64) 1701 LC = RTLIB::SREM_I64; 1702 else if (VT == MVT::i128) 1703 LC = RTLIB::SREM_I128; 1704 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1705 1706 return ARMEmitLibcall(I, LC); 1707 } 1708 1709 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1710 EVT VT = TLI.getValueType(I->getType(), true); 1711 1712 // We can get here in the case when we want to use NEON for our fp 1713 // operations, but can't figure out how to. Just use the vfp instructions 1714 // if we have them. 1715 // FIXME: It'd be nice to use NEON instructions. 1716 Type *Ty = I->getType(); 1717 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1718 if (isFloat && !Subtarget->hasVFP2()) 1719 return false; 1720 1721 unsigned Opc; 1722 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1723 switch (ISDOpcode) { 1724 default: return false; 1725 case ISD::FADD: 1726 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1727 break; 1728 case ISD::FSUB: 1729 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1730 break; 1731 case ISD::FMUL: 1732 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1733 break; 1734 } 1735 unsigned Op1 = getRegForValue(I->getOperand(0)); 1736 if (Op1 == 0) return false; 1737 1738 unsigned Op2 = getRegForValue(I->getOperand(1)); 1739 if (Op2 == 0) return false; 1740 1741 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1742 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1743 TII.get(Opc), ResultReg) 1744 .addReg(Op1).addReg(Op2)); 1745 UpdateValueMap(I, ResultReg); 1746 return true; 1747 } 1748 1749 // Call Handling Code 1750 1751 // This is largely taken directly from CCAssignFnForNode - we don't support 1752 // varargs in FastISel so that part has been removed. 1753 // TODO: We may not support all of this. 1754 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1755 switch (CC) { 1756 default: 1757 llvm_unreachable("Unsupported calling convention"); 1758 case CallingConv::Fast: 1759 // Ignore fastcc. Silence compiler warnings. 1760 (void)RetFastCC_ARM_APCS; 1761 (void)FastCC_ARM_APCS; 1762 // Fallthrough 1763 case CallingConv::C: 1764 // Use target triple & subtarget features to do actual dispatch. 1765 if (Subtarget->isAAPCS_ABI()) { 1766 if (Subtarget->hasVFP2() && 1767 TM.Options.FloatABIType == FloatABI::Hard) 1768 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1769 else 1770 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1771 } else 1772 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1773 case CallingConv::ARM_AAPCS_VFP: 1774 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1775 case CallingConv::ARM_AAPCS: 1776 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1777 case CallingConv::ARM_APCS: 1778 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1779 } 1780 } 1781 1782 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1783 SmallVectorImpl<unsigned> &ArgRegs, 1784 SmallVectorImpl<MVT> &ArgVTs, 1785 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1786 SmallVectorImpl<unsigned> &RegArgs, 1787 CallingConv::ID CC, 1788 unsigned &NumBytes) { 1789 SmallVector<CCValAssign, 16> ArgLocs; 1790 CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); 1791 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1792 1793 // Get a count of how many bytes are to be pushed on the stack. 1794 NumBytes = CCInfo.getNextStackOffset(); 1795 1796 // Issue CALLSEQ_START 1797 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 1798 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1799 TII.get(AdjStackDown)) 1800 .addImm(NumBytes)); 1801 1802 // Process the args. 1803 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1804 CCValAssign &VA = ArgLocs[i]; 1805 unsigned Arg = ArgRegs[VA.getValNo()]; 1806 MVT ArgVT = ArgVTs[VA.getValNo()]; 1807 1808 // We don't handle NEON/vector parameters yet. 1809 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1810 return false; 1811 1812 // Handle arg promotion, etc. 1813 switch (VA.getLocInfo()) { 1814 case CCValAssign::Full: break; 1815 case CCValAssign::SExt: { 1816 MVT DestVT = VA.getLocVT(); 1817 unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, 1818 /*isZExt*/false); 1819 assert (ResultReg != 0 && "Failed to emit a sext"); 1820 Arg = ResultReg; 1821 ArgVT = DestVT; 1822 break; 1823 } 1824 case CCValAssign::AExt: 1825 // Intentional fall-through. Handle AExt and ZExt. 1826 case CCValAssign::ZExt: { 1827 MVT DestVT = VA.getLocVT(); 1828 unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, 1829 /*isZExt*/true); 1830 assert (ResultReg != 0 && "Failed to emit a sext"); 1831 Arg = ResultReg; 1832 ArgVT = DestVT; 1833 break; 1834 } 1835 case CCValAssign::BCvt: { 1836 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1837 /*TODO: Kill=*/false); 1838 assert(BC != 0 && "Failed to emit a bitcast!"); 1839 Arg = BC; 1840 ArgVT = VA.getLocVT(); 1841 break; 1842 } 1843 default: llvm_unreachable("Unknown arg promotion!"); 1844 } 1845 1846 // Now copy/store arg to correct locations. 1847 if (VA.isRegLoc() && !VA.needsCustom()) { 1848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1849 VA.getLocReg()) 1850 .addReg(Arg); 1851 RegArgs.push_back(VA.getLocReg()); 1852 } else if (VA.needsCustom()) { 1853 // TODO: We need custom lowering for vector (v2f64) args. 1854 if (VA.getLocVT() != MVT::f64) return false; 1855 1856 CCValAssign &NextVA = ArgLocs[++i]; 1857 1858 // TODO: Only handle register args for now. 1859 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1860 1861 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1862 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1863 .addReg(NextVA.getLocReg(), RegState::Define) 1864 .addReg(Arg)); 1865 RegArgs.push_back(VA.getLocReg()); 1866 RegArgs.push_back(NextVA.getLocReg()); 1867 } else { 1868 assert(VA.isMemLoc()); 1869 // Need to store on the stack. 1870 Address Addr; 1871 Addr.BaseType = Address::RegBase; 1872 Addr.Base.Reg = ARM::SP; 1873 Addr.Offset = VA.getLocMemOffset(); 1874 1875 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1876 } 1877 } 1878 return true; 1879 } 1880 1881 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1882 const Instruction *I, CallingConv::ID CC, 1883 unsigned &NumBytes) { 1884 // Issue CALLSEQ_END 1885 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 1886 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1887 TII.get(AdjStackUp)) 1888 .addImm(NumBytes).addImm(0)); 1889 1890 // Now the return value. 1891 if (RetVT != MVT::isVoid) { 1892 SmallVector<CCValAssign, 16> RVLocs; 1893 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); 1894 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1895 1896 // Copy all of the result registers out of their specified physreg. 1897 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1898 // For this move we copy into two registers and then move into the 1899 // double fp reg we want. 1900 EVT DestVT = RVLocs[0].getValVT(); 1901 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1902 unsigned ResultReg = createResultReg(DstRC); 1903 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1904 TII.get(ARM::VMOVDRR), ResultReg) 1905 .addReg(RVLocs[0].getLocReg()) 1906 .addReg(RVLocs[1].getLocReg())); 1907 1908 UsedRegs.push_back(RVLocs[0].getLocReg()); 1909 UsedRegs.push_back(RVLocs[1].getLocReg()); 1910 1911 // Finally update the result. 1912 UpdateValueMap(I, ResultReg); 1913 } else { 1914 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1915 EVT CopyVT = RVLocs[0].getValVT(); 1916 1917 // Special handling for extended integers. 1918 if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) 1919 CopyVT = MVT::i32; 1920 1921 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1922 1923 unsigned ResultReg = createResultReg(DstRC); 1924 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1925 ResultReg).addReg(RVLocs[0].getLocReg()); 1926 UsedRegs.push_back(RVLocs[0].getLocReg()); 1927 1928 // Finally update the result. 1929 UpdateValueMap(I, ResultReg); 1930 } 1931 } 1932 1933 return true; 1934 } 1935 1936 bool ARMFastISel::SelectRet(const Instruction *I) { 1937 const ReturnInst *Ret = cast<ReturnInst>(I); 1938 const Function &F = *I->getParent()->getParent(); 1939 1940 if (!FuncInfo.CanLowerReturn) 1941 return false; 1942 1943 if (F.isVarArg()) 1944 return false; 1945 1946 CallingConv::ID CC = F.getCallingConv(); 1947 if (Ret->getNumOperands() > 0) { 1948 SmallVector<ISD::OutputArg, 4> Outs; 1949 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1950 Outs, TLI); 1951 1952 // Analyze operands of the call, assigning locations to each operand. 1953 SmallVector<CCValAssign, 16> ValLocs; 1954 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); 1955 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1956 1957 const Value *RV = Ret->getOperand(0); 1958 unsigned Reg = getRegForValue(RV); 1959 if (Reg == 0) 1960 return false; 1961 1962 // Only handle a single return value for now. 1963 if (ValLocs.size() != 1) 1964 return false; 1965 1966 CCValAssign &VA = ValLocs[0]; 1967 1968 // Don't bother handling odd stuff for now. 1969 if (VA.getLocInfo() != CCValAssign::Full) 1970 return false; 1971 // Only handle register returns for now. 1972 if (!VA.isRegLoc()) 1973 return false; 1974 1975 unsigned SrcReg = Reg + VA.getValNo(); 1976 EVT RVVT = TLI.getValueType(RV->getType()); 1977 EVT DestVT = VA.getValVT(); 1978 // Special handling for extended integers. 1979 if (RVVT != DestVT) { 1980 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 1981 return false; 1982 1983 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 1984 return false; 1985 1986 assert(DestVT == MVT::i32 && "ARM should always ext to i32"); 1987 1988 bool isZExt = Outs[0].Flags.isZExt(); 1989 unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt); 1990 if (ResultReg == 0) return false; 1991 SrcReg = ResultReg; 1992 } 1993 1994 // Make the copy. 1995 unsigned DstReg = VA.getLocReg(); 1996 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1997 // Avoid a cross-class copy. This is very unlikely. 1998 if (!SrcRC->contains(DstReg)) 1999 return false; 2000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 2001 DstReg).addReg(SrcReg); 2002 2003 // Mark the register as live out of the function. 2004 MRI.addLiveOut(VA.getLocReg()); 2005 } 2006 2007 unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; 2008 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2009 TII.get(RetOpc))); 2010 return true; 2011 } 2012 2013 unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 2014 2015 // iOS needs the r9 versions of the opcodes. 2016 bool isiOS = Subtarget->isTargetIOS(); 2017 if (isThumb2) { 2018 return isiOS ? ARM::tBLr9 : ARM::tBL; 2019 } else { 2020 return isiOS ? ARM::BLr9 : ARM::BL; 2021 } 2022 } 2023 2024 // A quick function that will emit a call for a named libcall in F with the 2025 // vector of passed arguments for the Instruction in I. We can assume that we 2026 // can emit a call for any libcall we can produce. This is an abridged version 2027 // of the full call infrastructure since we won't need to worry about things 2028 // like computed function pointers or strange arguments at call sites. 2029 // TODO: Try to unify this and the normal call bits for ARM, then try to unify 2030 // with X86. 2031 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 2032 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 2033 2034 // Handle *simple* calls for now. 2035 Type *RetTy = I->getType(); 2036 MVT RetVT; 2037 if (RetTy->isVoidTy()) 2038 RetVT = MVT::isVoid; 2039 else if (!isTypeLegal(RetTy, RetVT)) 2040 return false; 2041 2042 // TODO: For now if we have long calls specified we don't handle the call. 2043 if (EnableARMLongCalls) return false; 2044 2045 // Set up the argument vectors. 2046 SmallVector<Value*, 8> Args; 2047 SmallVector<unsigned, 8> ArgRegs; 2048 SmallVector<MVT, 8> ArgVTs; 2049 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 2050 Args.reserve(I->getNumOperands()); 2051 ArgRegs.reserve(I->getNumOperands()); 2052 ArgVTs.reserve(I->getNumOperands()); 2053 ArgFlags.reserve(I->getNumOperands()); 2054 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 2055 Value *Op = I->getOperand(i); 2056 unsigned Arg = getRegForValue(Op); 2057 if (Arg == 0) return false; 2058 2059 Type *ArgTy = Op->getType(); 2060 MVT ArgVT; 2061 if (!isTypeLegal(ArgTy, ArgVT)) return false; 2062 2063 ISD::ArgFlagsTy Flags; 2064 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 2065 Flags.setOrigAlign(OriginalAlignment); 2066 2067 Args.push_back(Op); 2068 ArgRegs.push_back(Arg); 2069 ArgVTs.push_back(ArgVT); 2070 ArgFlags.push_back(Flags); 2071 } 2072 2073 // Handle the arguments now that we've gotten them. 2074 SmallVector<unsigned, 4> RegArgs; 2075 unsigned NumBytes; 2076 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 2077 return false; 2078 2079 // Issue the call, BLr9 for iOS, BL otherwise. 2080 // TODO: Turn this into the table of arm call ops. 2081 MachineInstrBuilder MIB; 2082 unsigned CallOpc = ARMSelectCallOp(NULL); 2083 if(isThumb2) 2084 // Explicitly adding the predicate here. 2085 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2086 TII.get(CallOpc))) 2087 .addExternalSymbol(TLI.getLibcallName(Call)); 2088 else 2089 // Explicitly adding the predicate here. 2090 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2091 TII.get(CallOpc)) 2092 .addExternalSymbol(TLI.getLibcallName(Call))); 2093 2094 // Add implicit physical register uses to the call. 2095 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 2096 MIB.addReg(RegArgs[i]); 2097 2098 // Finish off the call including any return values. 2099 SmallVector<unsigned, 4> UsedRegs; 2100 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 2101 2102 // Set all unused physreg defs as dead. 2103 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 2104 2105 return true; 2106 } 2107 2108 bool ARMFastISel::SelectCall(const Instruction *I, 2109 const char *IntrMemName = 0) { 2110 const CallInst *CI = cast<CallInst>(I); 2111 const Value *Callee = CI->getCalledValue(); 2112 2113 // Can't handle inline asm. 2114 if (isa<InlineAsm>(Callee)) return false; 2115 2116 // Only handle global variable Callees. 2117 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 2118 if (!GV) 2119 return false; 2120 2121 // Check the calling convention. 2122 ImmutableCallSite CS(CI); 2123 CallingConv::ID CC = CS.getCallingConv(); 2124 2125 // TODO: Avoid some calling conventions? 2126 2127 // Let SDISel handle vararg functions. 2128 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 2129 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 2130 if (FTy->isVarArg()) 2131 return false; 2132 2133 // Handle *simple* calls for now. 2134 Type *RetTy = I->getType(); 2135 MVT RetVT; 2136 if (RetTy->isVoidTy()) 2137 RetVT = MVT::isVoid; 2138 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && 2139 RetVT != MVT::i8 && RetVT != MVT::i1) 2140 return false; 2141 2142 // TODO: For now if we have long calls specified we don't handle the call. 2143 if (EnableARMLongCalls) return false; 2144 2145 // Set up the argument vectors. 2146 SmallVector<Value*, 8> Args; 2147 SmallVector<unsigned, 8> ArgRegs; 2148 SmallVector<MVT, 8> ArgVTs; 2149 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 2150 Args.reserve(CS.arg_size()); 2151 ArgRegs.reserve(CS.arg_size()); 2152 ArgVTs.reserve(CS.arg_size()); 2153 ArgFlags.reserve(CS.arg_size()); 2154 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 2155 i != e; ++i) { 2156 // If we're lowering a memory intrinsic instead of a regular call, skip the 2157 // last two arguments, which shouldn't be passed to the underlying function. 2158 if (IntrMemName && e-i <= 2) 2159 break; 2160 2161 ISD::ArgFlagsTy Flags; 2162 unsigned AttrInd = i - CS.arg_begin() + 1; 2163 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 2164 Flags.setSExt(); 2165 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 2166 Flags.setZExt(); 2167 2168 // FIXME: Only handle *easy* calls for now. 2169 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 2170 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 2171 CS.paramHasAttr(AttrInd, Attribute::Nest) || 2172 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 2173 return false; 2174 2175 Type *ArgTy = (*i)->getType(); 2176 MVT ArgVT; 2177 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && 2178 ArgVT != MVT::i1) 2179 return false; 2180 2181 unsigned Arg = getRegForValue(*i); 2182 if (Arg == 0) 2183 return false; 2184 2185 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 2186 Flags.setOrigAlign(OriginalAlignment); 2187 2188 Args.push_back(*i); 2189 ArgRegs.push_back(Arg); 2190 ArgVTs.push_back(ArgVT); 2191 ArgFlags.push_back(Flags); 2192 } 2193 2194 // Handle the arguments now that we've gotten them. 2195 SmallVector<unsigned, 4> RegArgs; 2196 unsigned NumBytes; 2197 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 2198 return false; 2199 2200 // Issue the call, BLr9 for iOS, BL otherwise. 2201 // TODO: Turn this into the table of arm call ops. 2202 MachineInstrBuilder MIB; 2203 unsigned CallOpc = ARMSelectCallOp(GV); 2204 // Explicitly adding the predicate here. 2205 if(isThumb2) { 2206 // Explicitly adding the predicate here. 2207 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2208 TII.get(CallOpc))); 2209 if (!IntrMemName) 2210 MIB.addGlobalAddress(GV, 0, 0); 2211 else 2212 MIB.addExternalSymbol(IntrMemName, 0); 2213 } else { 2214 if (!IntrMemName) 2215 // Explicitly adding the predicate here. 2216 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2217 TII.get(CallOpc)) 2218 .addGlobalAddress(GV, 0, 0)); 2219 else 2220 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2221 TII.get(CallOpc)) 2222 .addExternalSymbol(IntrMemName, 0)); 2223 } 2224 2225 // Add implicit physical register uses to the call. 2226 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 2227 MIB.addReg(RegArgs[i]); 2228 2229 // Finish off the call including any return values. 2230 SmallVector<unsigned, 4> UsedRegs; 2231 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 2232 2233 // Set all unused physreg defs as dead. 2234 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 2235 2236 return true; 2237 } 2238 2239 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { 2240 return Len <= 16; 2241 } 2242 2243 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { 2244 // Make sure we don't bloat code by inlining very large memcpy's. 2245 if (!ARMIsMemCpySmall(Len)) 2246 return false; 2247 2248 // We don't care about alignment here since we just emit integer accesses. 2249 while (Len) { 2250 MVT VT; 2251 if (Len >= 4) 2252 VT = MVT::i32; 2253 else if (Len >= 2) 2254 VT = MVT::i16; 2255 else { 2256 assert(Len == 1); 2257 VT = MVT::i8; 2258 } 2259 2260 bool RV; 2261 unsigned ResultReg; 2262 RV = ARMEmitLoad(VT, ResultReg, Src); 2263 assert (RV = true && "Should be able to handle this load."); 2264 RV = ARMEmitStore(VT, ResultReg, Dest); 2265 assert (RV = true && "Should be able to handle this store."); 2266 2267 unsigned Size = VT.getSizeInBits()/8; 2268 Len -= Size; 2269 Dest.Offset += Size; 2270 Src.Offset += Size; 2271 } 2272 2273 return true; 2274 } 2275 2276 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { 2277 // FIXME: Handle more intrinsics. 2278 switch (I.getIntrinsicID()) { 2279 default: return false; 2280 case Intrinsic::memcpy: 2281 case Intrinsic::memmove: { 2282 const MemTransferInst &MTI = cast<MemTransferInst>(I); 2283 // Don't handle volatile. 2284 if (MTI.isVolatile()) 2285 return false; 2286 2287 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 2288 // we would emit dead code because we don't currently handle memmoves. 2289 bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); 2290 if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { 2291 // Small memcpy's are common enough that we want to do them without a call 2292 // if possible. 2293 uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); 2294 if (ARMIsMemCpySmall(Len)) { 2295 Address Dest, Src; 2296 if (!ARMComputeAddress(MTI.getRawDest(), Dest) || 2297 !ARMComputeAddress(MTI.getRawSource(), Src)) 2298 return false; 2299 if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) 2300 return true; 2301 } 2302 } 2303 2304 if (!MTI.getLength()->getType()->isIntegerTy(32)) 2305 return false; 2306 2307 if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) 2308 return false; 2309 2310 const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; 2311 return SelectCall(&I, IntrMemName); 2312 } 2313 case Intrinsic::memset: { 2314 const MemSetInst &MSI = cast<MemSetInst>(I); 2315 // Don't handle volatile. 2316 if (MSI.isVolatile()) 2317 return false; 2318 2319 if (!MSI.getLength()->getType()->isIntegerTy(32)) 2320 return false; 2321 2322 if (MSI.getDestAddressSpace() > 255) 2323 return false; 2324 2325 return SelectCall(&I, "memset"); 2326 } 2327 } 2328 return false; 2329 } 2330 2331 bool ARMFastISel::SelectTrunc(const Instruction *I) { 2332 // The high bits for a type smaller than the register size are assumed to be 2333 // undefined. 2334 Value *Op = I->getOperand(0); 2335 2336 EVT SrcVT, DestVT; 2337 SrcVT = TLI.getValueType(Op->getType(), true); 2338 DestVT = TLI.getValueType(I->getType(), true); 2339 2340 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) 2341 return false; 2342 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) 2343 return false; 2344 2345 unsigned SrcReg = getRegForValue(Op); 2346 if (!SrcReg) return false; 2347 2348 // Because the high bits are undefined, a truncate doesn't generate 2349 // any code. 2350 UpdateValueMap(I, SrcReg); 2351 return true; 2352 } 2353 2354 unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, 2355 bool isZExt) { 2356 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) 2357 return 0; 2358 2359 unsigned Opc; 2360 bool isBoolZext = false; 2361 if (!SrcVT.isSimple()) return 0; 2362 switch (SrcVT.getSimpleVT().SimpleTy) { 2363 default: return 0; 2364 case MVT::i16: 2365 if (!Subtarget->hasV6Ops()) return 0; 2366 if (isZExt) 2367 Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; 2368 else 2369 Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 2370 break; 2371 case MVT::i8: 2372 if (!Subtarget->hasV6Ops()) return 0; 2373 if (isZExt) 2374 Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; 2375 else 2376 Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 2377 break; 2378 case MVT::i1: 2379 if (isZExt) { 2380 Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; 2381 isBoolZext = true; 2382 break; 2383 } 2384 return 0; 2385 } 2386 2387 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 2388 MachineInstrBuilder MIB; 2389 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) 2390 .addReg(SrcReg); 2391 if (isBoolZext) 2392 MIB.addImm(1); 2393 else 2394 MIB.addImm(0); 2395 AddOptionalDefs(MIB); 2396 return ResultReg; 2397 } 2398 2399 bool ARMFastISel::SelectIntExt(const Instruction *I) { 2400 // On ARM, in general, integer casts don't involve legal types; this code 2401 // handles promotable integers. 2402 Type *DestTy = I->getType(); 2403 Value *Src = I->getOperand(0); 2404 Type *SrcTy = Src->getType(); 2405 2406 EVT SrcVT, DestVT; 2407 SrcVT = TLI.getValueType(SrcTy, true); 2408 DestVT = TLI.getValueType(DestTy, true); 2409 2410 bool isZExt = isa<ZExtInst>(I); 2411 unsigned SrcReg = getRegForValue(Src); 2412 if (!SrcReg) return false; 2413 2414 unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); 2415 if (ResultReg == 0) return false; 2416 UpdateValueMap(I, ResultReg); 2417 return true; 2418 } 2419 2420 // TODO: SoftFP support. 2421 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 2422 2423 switch (I->getOpcode()) { 2424 case Instruction::Load: 2425 return SelectLoad(I); 2426 case Instruction::Store: 2427 return SelectStore(I); 2428 case Instruction::Br: 2429 return SelectBranch(I); 2430 case Instruction::ICmp: 2431 case Instruction::FCmp: 2432 return SelectCmp(I); 2433 case Instruction::FPExt: 2434 return SelectFPExt(I); 2435 case Instruction::FPTrunc: 2436 return SelectFPTrunc(I); 2437 case Instruction::SIToFP: 2438 return SelectSIToFP(I); 2439 case Instruction::FPToSI: 2440 return SelectFPToSI(I); 2441 case Instruction::FAdd: 2442 return SelectBinaryOp(I, ISD::FADD); 2443 case Instruction::FSub: 2444 return SelectBinaryOp(I, ISD::FSUB); 2445 case Instruction::FMul: 2446 return SelectBinaryOp(I, ISD::FMUL); 2447 case Instruction::SDiv: 2448 return SelectSDiv(I); 2449 case Instruction::SRem: 2450 return SelectSRem(I); 2451 case Instruction::Call: 2452 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 2453 return SelectIntrinsicCall(*II); 2454 return SelectCall(I); 2455 case Instruction::Select: 2456 return SelectSelect(I); 2457 case Instruction::Ret: 2458 return SelectRet(I); 2459 case Instruction::Trunc: 2460 return SelectTrunc(I); 2461 case Instruction::ZExt: 2462 case Instruction::SExt: 2463 return SelectIntExt(I); 2464 default: break; 2465 } 2466 return false; 2467 } 2468 2469 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that 2470 /// vreg is being provided by the specified load instruction. If possible, 2471 /// try to fold the load as an operand to the instruction, returning true if 2472 /// successful. 2473 bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 2474 const LoadInst *LI) { 2475 // Verify we have a legal type before going any further. 2476 MVT VT; 2477 if (!isLoadTypeLegal(LI->getType(), VT)) 2478 return false; 2479 2480 // Combine load followed by zero- or sign-extend. 2481 // ldrb r1, [r0] ldrb r1, [r0] 2482 // uxtb r2, r1 => 2483 // mov r3, r2 mov r3, r1 2484 bool isZExt = true; 2485 switch(MI->getOpcode()) { 2486 default: return false; 2487 case ARM::SXTH: 2488 case ARM::t2SXTH: 2489 isZExt = false; 2490 case ARM::UXTH: 2491 case ARM::t2UXTH: 2492 if (VT != MVT::i16) 2493 return false; 2494 break; 2495 case ARM::SXTB: 2496 case ARM::t2SXTB: 2497 isZExt = false; 2498 case ARM::UXTB: 2499 case ARM::t2UXTB: 2500 if (VT != MVT::i8) 2501 return false; 2502 break; 2503 } 2504 // See if we can handle this address. 2505 Address Addr; 2506 if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; 2507 2508 unsigned ResultReg = MI->getOperand(0).getReg(); 2509 if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) 2510 return false; 2511 MI->eraseFromParent(); 2512 return true; 2513 } 2514 2515 namespace llvm { 2516 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 2517 // Completely untested on non-iOS. 2518 const TargetMachine &TM = funcInfo.MF->getTarget(); 2519 2520 // Darwin and thumb1 only for now. 2521 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 2522 if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() && 2523 !DisableARMFastISel) 2524 return new ARMFastISel(funcInfo); 2525 return 0; 2526 } 2527 } 2528