1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the ARM-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // ARMGenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMCallingConv.h" 19 #include "ARMRegisterInfo.h" 20 #include "ARMTargetMachine.h" 21 #include "ARMSubtarget.h" 22 #include "ARMConstantPoolValue.h" 23 #include "llvm/CallingConv.h" 24 #include "llvm/DerivedTypes.h" 25 #include "llvm/GlobalVariable.h" 26 #include "llvm/Instructions.h" 27 #include "llvm/IntrinsicInst.h" 28 #include "llvm/Module.h" 29 #include "llvm/Operator.h" 30 #include "llvm/CodeGen/Analysis.h" 31 #include "llvm/CodeGen/FastISel.h" 32 #include "llvm/CodeGen/FunctionLoweringInfo.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineConstantPool.h" 36 #include "llvm/CodeGen/MachineFrameInfo.h" 37 #include "llvm/CodeGen/MachineMemOperand.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/PseudoSourceValue.h" 40 #include "llvm/Support/CallSite.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/GetElementPtrTypeIterator.h" 44 #include "llvm/Target/TargetData.h" 45 #include "llvm/Target/TargetInstrInfo.h" 46 #include "llvm/Target/TargetLowering.h" 47 #include "llvm/Target/TargetMachine.h" 48 #include "llvm/Target/TargetOptions.h" 49 using namespace llvm; 50 51 static cl::opt<bool> 52 DisableARMFastISel("disable-arm-fast-isel", 53 cl::desc("Turn off experimental ARM fast-isel support"), 54 cl::init(false), cl::Hidden); 55 56 extern cl::opt<bool> EnableARMLongCalls; 57 58 namespace { 59 60 // All possible address modes, plus some. 61 typedef struct Address { 62 enum { 63 RegBase, 64 FrameIndexBase 65 } BaseType; 66 67 union { 68 unsigned Reg; 69 int FI; 70 } Base; 71 72 int Offset; 73 unsigned Scale; 74 unsigned PlusReg; 75 76 // Innocuous defaults for our address. 77 Address() 78 : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) { 79 Base.Reg = 0; 80 } 81 } Address; 82 83 class ARMFastISel : public FastISel { 84 85 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 86 /// make the right decision when generating code for different targets. 87 const ARMSubtarget *Subtarget; 88 const TargetMachine &TM; 89 const TargetInstrInfo &TII; 90 const TargetLowering &TLI; 91 ARMFunctionInfo *AFI; 92 93 // Convenience variables to avoid some queries. 94 bool isThumb; 95 LLVMContext *Context; 96 97 public: 98 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 99 : FastISel(funcInfo), 100 TM(funcInfo.MF->getTarget()), 101 TII(*TM.getInstrInfo()), 102 TLI(*TM.getTargetLowering()) { 103 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 104 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 105 isThumb = AFI->isThumbFunction(); 106 Context = &funcInfo.Fn->getContext(); 107 } 108 109 // Code from FastISel.cpp. 110 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 111 const TargetRegisterClass *RC); 112 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 113 const TargetRegisterClass *RC, 114 unsigned Op0, bool Op0IsKill); 115 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 116 const TargetRegisterClass *RC, 117 unsigned Op0, bool Op0IsKill, 118 unsigned Op1, bool Op1IsKill); 119 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 120 const TargetRegisterClass *RC, 121 unsigned Op0, bool Op0IsKill, 122 unsigned Op1, bool Op1IsKill, 123 unsigned Op2, bool Op2IsKill); 124 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 125 const TargetRegisterClass *RC, 126 unsigned Op0, bool Op0IsKill, 127 uint64_t Imm); 128 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 129 const TargetRegisterClass *RC, 130 unsigned Op0, bool Op0IsKill, 131 const ConstantFP *FPImm); 132 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 133 const TargetRegisterClass *RC, 134 unsigned Op0, bool Op0IsKill, 135 unsigned Op1, bool Op1IsKill, 136 uint64_t Imm); 137 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 138 const TargetRegisterClass *RC, 139 uint64_t Imm); 140 141 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 142 unsigned Op0, bool Op0IsKill, 143 uint32_t Idx); 144 145 // Backend specific FastISel code. 146 virtual bool TargetSelectInstruction(const Instruction *I); 147 virtual unsigned TargetMaterializeConstant(const Constant *C); 148 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 149 150 #include "ARMGenFastISel.inc" 151 152 // Instruction selection routines. 153 private: 154 bool SelectLoad(const Instruction *I); 155 bool SelectStore(const Instruction *I); 156 bool SelectBranch(const Instruction *I); 157 bool SelectCmp(const Instruction *I); 158 bool SelectFPExt(const Instruction *I); 159 bool SelectFPTrunc(const Instruction *I); 160 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 161 bool SelectSIToFP(const Instruction *I); 162 bool SelectFPToSI(const Instruction *I); 163 bool SelectSDiv(const Instruction *I); 164 bool SelectSRem(const Instruction *I); 165 bool SelectCall(const Instruction *I); 166 bool SelectSelect(const Instruction *I); 167 bool SelectRet(const Instruction *I); 168 169 // Utility routines. 170 private: 171 bool isTypeLegal(const Type *Ty, MVT &VT); 172 bool isLoadTypeLegal(const Type *Ty, MVT &VT); 173 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); 174 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); 175 bool ARMComputeAddress(const Value *Obj, Address &Addr); 176 void ARMSimplifyAddress(Address &Addr, EVT VT); 177 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 178 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 179 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 180 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 181 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 182 unsigned ARMSelectCallOp(const GlobalValue *GV); 183 184 // Call handling routines. 185 private: 186 bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 187 unsigned &ResultReg); 188 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 189 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 190 SmallVectorImpl<unsigned> &ArgRegs, 191 SmallVectorImpl<MVT> &ArgVTs, 192 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 193 SmallVectorImpl<unsigned> &RegArgs, 194 CallingConv::ID CC, 195 unsigned &NumBytes); 196 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 197 const Instruction *I, CallingConv::ID CC, 198 unsigned &NumBytes); 199 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 200 201 // OptionalDef handling routines. 202 private: 203 bool isARMNEONPred(const MachineInstr *MI); 204 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 205 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 206 void AddLoadStoreOperands(EVT VT, Address &Addr, 207 const MachineInstrBuilder &MIB); 208 }; 209 210 } // end anonymous namespace 211 212 #include "ARMGenCallingConv.inc" 213 214 // DefinesOptionalPredicate - This is different from DefinesPredicate in that 215 // we don't care about implicit defs here, just places we'll need to add a 216 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 217 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 218 const TargetInstrDesc &TID = MI->getDesc(); 219 if (!TID.hasOptionalDef()) 220 return false; 221 222 // Look to see if our OptionalDef is defining CPSR or CCR. 223 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 224 const MachineOperand &MO = MI->getOperand(i); 225 if (!MO.isReg() || !MO.isDef()) continue; 226 if (MO.getReg() == ARM::CPSR) 227 *CPSR = true; 228 } 229 return true; 230 } 231 232 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 233 const TargetInstrDesc &TID = MI->getDesc(); 234 235 // If we're a thumb2 or not NEON function we were handled via isPredicable. 236 if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 237 AFI->isThumb2Function()) 238 return false; 239 240 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) 241 if (TID.OpInfo[i].isPredicate()) 242 return true; 243 244 return false; 245 } 246 247 // If the machine is predicable go ahead and add the predicate operands, if 248 // it needs default CC operands add those. 249 // TODO: If we want to support thumb1 then we'll need to deal with optional 250 // CPSR defs that need to be added before the remaining operands. See s_cc_out 251 // for descriptions why. 252 const MachineInstrBuilder & 253 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 254 MachineInstr *MI = &*MIB; 255 256 // Do we use a predicate? or... 257 // Are we NEON in ARM mode and have a predicate operand? If so, I know 258 // we're not predicable but add it anyways. 259 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 260 AddDefaultPred(MIB); 261 262 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 263 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 264 bool CPSR = false; 265 if (DefinesOptionalPredicate(MI, &CPSR)) { 266 if (CPSR) 267 AddDefaultT1CC(MIB); 268 else 269 AddDefaultCC(MIB); 270 } 271 return MIB; 272 } 273 274 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 275 const TargetRegisterClass* RC) { 276 unsigned ResultReg = createResultReg(RC); 277 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 278 279 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 280 return ResultReg; 281 } 282 283 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 284 const TargetRegisterClass *RC, 285 unsigned Op0, bool Op0IsKill) { 286 unsigned ResultReg = createResultReg(RC); 287 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 288 289 if (II.getNumDefs() >= 1) 290 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 291 .addReg(Op0, Op0IsKill * RegState::Kill)); 292 else { 293 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 294 .addReg(Op0, Op0IsKill * RegState::Kill)); 295 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 296 TII.get(TargetOpcode::COPY), ResultReg) 297 .addReg(II.ImplicitDefs[0])); 298 } 299 return ResultReg; 300 } 301 302 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 303 const TargetRegisterClass *RC, 304 unsigned Op0, bool Op0IsKill, 305 unsigned Op1, bool Op1IsKill) { 306 unsigned ResultReg = createResultReg(RC); 307 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 308 309 if (II.getNumDefs() >= 1) 310 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 311 .addReg(Op0, Op0IsKill * RegState::Kill) 312 .addReg(Op1, Op1IsKill * RegState::Kill)); 313 else { 314 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 315 .addReg(Op0, Op0IsKill * RegState::Kill) 316 .addReg(Op1, Op1IsKill * RegState::Kill)); 317 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 318 TII.get(TargetOpcode::COPY), ResultReg) 319 .addReg(II.ImplicitDefs[0])); 320 } 321 return ResultReg; 322 } 323 324 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 325 const TargetRegisterClass *RC, 326 unsigned Op0, bool Op0IsKill, 327 unsigned Op1, bool Op1IsKill, 328 unsigned Op2, bool Op2IsKill) { 329 unsigned ResultReg = createResultReg(RC); 330 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 331 332 if (II.getNumDefs() >= 1) 333 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 334 .addReg(Op0, Op0IsKill * RegState::Kill) 335 .addReg(Op1, Op1IsKill * RegState::Kill) 336 .addReg(Op2, Op2IsKill * RegState::Kill)); 337 else { 338 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 339 .addReg(Op0, Op0IsKill * RegState::Kill) 340 .addReg(Op1, Op1IsKill * RegState::Kill) 341 .addReg(Op2, Op2IsKill * RegState::Kill)); 342 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 343 TII.get(TargetOpcode::COPY), ResultReg) 344 .addReg(II.ImplicitDefs[0])); 345 } 346 return ResultReg; 347 } 348 349 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 350 const TargetRegisterClass *RC, 351 unsigned Op0, bool Op0IsKill, 352 uint64_t Imm) { 353 unsigned ResultReg = createResultReg(RC); 354 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 355 356 if (II.getNumDefs() >= 1) 357 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 358 .addReg(Op0, Op0IsKill * RegState::Kill) 359 .addImm(Imm)); 360 else { 361 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 362 .addReg(Op0, Op0IsKill * RegState::Kill) 363 .addImm(Imm)); 364 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 365 TII.get(TargetOpcode::COPY), ResultReg) 366 .addReg(II.ImplicitDefs[0])); 367 } 368 return ResultReg; 369 } 370 371 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 372 const TargetRegisterClass *RC, 373 unsigned Op0, bool Op0IsKill, 374 const ConstantFP *FPImm) { 375 unsigned ResultReg = createResultReg(RC); 376 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 377 378 if (II.getNumDefs() >= 1) 379 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 380 .addReg(Op0, Op0IsKill * RegState::Kill) 381 .addFPImm(FPImm)); 382 else { 383 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 384 .addReg(Op0, Op0IsKill * RegState::Kill) 385 .addFPImm(FPImm)); 386 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 387 TII.get(TargetOpcode::COPY), ResultReg) 388 .addReg(II.ImplicitDefs[0])); 389 } 390 return ResultReg; 391 } 392 393 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 394 const TargetRegisterClass *RC, 395 unsigned Op0, bool Op0IsKill, 396 unsigned Op1, bool Op1IsKill, 397 uint64_t Imm) { 398 unsigned ResultReg = createResultReg(RC); 399 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 400 401 if (II.getNumDefs() >= 1) 402 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 403 .addReg(Op0, Op0IsKill * RegState::Kill) 404 .addReg(Op1, Op1IsKill * RegState::Kill) 405 .addImm(Imm)); 406 else { 407 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 408 .addReg(Op0, Op0IsKill * RegState::Kill) 409 .addReg(Op1, Op1IsKill * RegState::Kill) 410 .addImm(Imm)); 411 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 412 TII.get(TargetOpcode::COPY), ResultReg) 413 .addReg(II.ImplicitDefs[0])); 414 } 415 return ResultReg; 416 } 417 418 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 419 const TargetRegisterClass *RC, 420 uint64_t Imm) { 421 unsigned ResultReg = createResultReg(RC); 422 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 423 424 if (II.getNumDefs() >= 1) 425 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 426 .addImm(Imm)); 427 else { 428 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 429 .addImm(Imm)); 430 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 431 TII.get(TargetOpcode::COPY), ResultReg) 432 .addReg(II.ImplicitDefs[0])); 433 } 434 return ResultReg; 435 } 436 437 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 438 unsigned Op0, bool Op0IsKill, 439 uint32_t Idx) { 440 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 441 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 442 "Cannot yet extract from physregs"); 443 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 444 DL, TII.get(TargetOpcode::COPY), ResultReg) 445 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 446 return ResultReg; 447 } 448 449 // TODO: Don't worry about 64-bit now, but when this is fixed remove the 450 // checks from the various callers. 451 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 452 if (VT == MVT::f64) return 0; 453 454 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 455 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 456 TII.get(ARM::VMOVRS), MoveReg) 457 .addReg(SrcReg)); 458 return MoveReg; 459 } 460 461 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 462 if (VT == MVT::i64) return 0; 463 464 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 465 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 466 TII.get(ARM::VMOVSR), MoveReg) 467 .addReg(SrcReg)); 468 return MoveReg; 469 } 470 471 // For double width floating point we need to materialize two constants 472 // (the high and the low) into integer registers then use a move to get 473 // the combined constant into an FP reg. 474 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 475 const APFloat Val = CFP->getValueAPF(); 476 bool is64bit = VT == MVT::f64; 477 478 // This checks to see if we can use VFP3 instructions to materialize 479 // a constant, otherwise we have to go through the constant pool. 480 if (TLI.isFPImmLegal(Val, VT)) { 481 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 482 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 483 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 484 DestReg) 485 .addFPImm(CFP)); 486 return DestReg; 487 } 488 489 // Require VFP2 for loading fp constants. 490 if (!Subtarget->hasVFP2()) return false; 491 492 // MachineConstantPool wants an explicit alignment. 493 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 494 if (Align == 0) { 495 // TODO: Figure out if this is correct. 496 Align = TD.getTypeAllocSize(CFP->getType()); 497 } 498 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 499 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 500 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 501 502 // The extra reg is for addrmode5. 503 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 504 DestReg) 505 .addConstantPoolIndex(Idx) 506 .addReg(0)); 507 return DestReg; 508 } 509 510 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 511 512 // For now 32-bit only. 513 if (VT != MVT::i32) return false; 514 515 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 516 517 // If we can do this in a single instruction without a constant pool entry 518 // do so now. 519 const ConstantInt *CI = cast<ConstantInt>(C); 520 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { 521 unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; 522 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 523 TII.get(Opc), DestReg) 524 .addImm(CI->getSExtValue())); 525 return DestReg; 526 } 527 528 // MachineConstantPool wants an explicit alignment. 529 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 530 if (Align == 0) { 531 // TODO: Figure out if this is correct. 532 Align = TD.getTypeAllocSize(C->getType()); 533 } 534 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 535 536 if (isThumb) 537 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 538 TII.get(ARM::t2LDRpci), DestReg) 539 .addConstantPoolIndex(Idx)); 540 else 541 // The extra immediate is for addrmode2. 542 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 543 TII.get(ARM::LDRcp), DestReg) 544 .addConstantPoolIndex(Idx) 545 .addImm(0)); 546 547 return DestReg; 548 } 549 550 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 551 // For now 32-bit only. 552 if (VT != MVT::i32) return 0; 553 554 Reloc::Model RelocM = TM.getRelocationModel(); 555 556 // TODO: No external globals for now. 557 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 558 559 // TODO: Need more magic for ARM PIC. 560 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 561 562 // MachineConstantPool wants an explicit alignment. 563 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 564 if (Align == 0) { 565 // TODO: Figure out if this is correct. 566 Align = TD.getTypeAllocSize(GV->getType()); 567 } 568 569 // Grab index. 570 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 571 unsigned Id = AFI->createPICLabelUId(); 572 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 573 ARMCP::CPValue, PCAdj); 574 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 575 576 // Load value. 577 MachineInstrBuilder MIB; 578 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 579 if (isThumb) { 580 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 581 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 582 .addConstantPoolIndex(Idx); 583 if (RelocM == Reloc::PIC_) 584 MIB.addImm(Id); 585 } else { 586 // The extra immediate is for addrmode2. 587 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 588 DestReg) 589 .addConstantPoolIndex(Idx) 590 .addImm(0); 591 } 592 AddOptionalDefs(MIB); 593 return DestReg; 594 } 595 596 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 597 EVT VT = TLI.getValueType(C->getType(), true); 598 599 // Only handle simple types. 600 if (!VT.isSimple()) return 0; 601 602 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 603 return ARMMaterializeFP(CFP, VT); 604 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 605 return ARMMaterializeGV(GV, VT); 606 else if (isa<ConstantInt>(C)) 607 return ARMMaterializeInt(C, VT); 608 609 return 0; 610 } 611 612 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 613 // Don't handle dynamic allocas. 614 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 615 616 MVT VT; 617 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 618 619 DenseMap<const AllocaInst*, int>::iterator SI = 620 FuncInfo.StaticAllocaMap.find(AI); 621 622 // This will get lowered later into the correct offsets and registers 623 // via rewriteXFrameIndex. 624 if (SI != FuncInfo.StaticAllocaMap.end()) { 625 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 626 unsigned ResultReg = createResultReg(RC); 627 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 628 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 629 TII.get(Opc), ResultReg) 630 .addFrameIndex(SI->second) 631 .addImm(0)); 632 return ResultReg; 633 } 634 635 return 0; 636 } 637 638 bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { 639 EVT evt = TLI.getValueType(Ty, true); 640 641 // Only handle simple types. 642 if (evt == MVT::Other || !evt.isSimple()) return false; 643 VT = evt.getSimpleVT(); 644 645 // Handle all legal types, i.e. a register that will directly hold this 646 // value. 647 return TLI.isTypeLegal(VT); 648 } 649 650 bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { 651 if (isTypeLegal(Ty, VT)) return true; 652 653 // If this is a type than can be sign or zero-extended to a basic operation 654 // go ahead and accept it now. 655 if (VT == MVT::i8 || VT == MVT::i16) 656 return true; 657 658 return false; 659 } 660 661 // Computes the address to get to an object. 662 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 663 // Some boilerplate from the X86 FastISel. 664 const User *U = NULL; 665 unsigned Opcode = Instruction::UserOp1; 666 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 667 // Don't walk into other basic blocks unless the object is an alloca from 668 // another block, otherwise it may not have a virtual register assigned. 669 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 670 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 671 Opcode = I->getOpcode(); 672 U = I; 673 } 674 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 675 Opcode = C->getOpcode(); 676 U = C; 677 } 678 679 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 680 if (Ty->getAddressSpace() > 255) 681 // Fast instruction selection doesn't support the special 682 // address spaces. 683 return false; 684 685 switch (Opcode) { 686 default: 687 break; 688 case Instruction::BitCast: { 689 // Look through bitcasts. 690 return ARMComputeAddress(U->getOperand(0), Addr); 691 } 692 case Instruction::IntToPtr: { 693 // Look past no-op inttoptrs. 694 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 695 return ARMComputeAddress(U->getOperand(0), Addr); 696 break; 697 } 698 case Instruction::PtrToInt: { 699 // Look past no-op ptrtoints. 700 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 701 return ARMComputeAddress(U->getOperand(0), Addr); 702 break; 703 } 704 case Instruction::GetElementPtr: { 705 Address SavedAddr = Addr; 706 int TmpOffset = Addr.Offset; 707 708 // Iterate through the GEP folding the constants into offsets where 709 // we can. 710 gep_type_iterator GTI = gep_type_begin(U); 711 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 712 i != e; ++i, ++GTI) { 713 const Value *Op = *i; 714 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 715 const StructLayout *SL = TD.getStructLayout(STy); 716 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 717 TmpOffset += SL->getElementOffset(Idx); 718 } else { 719 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 720 for (;;) { 721 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 722 // Constant-offset addressing. 723 TmpOffset += CI->getSExtValue() * S; 724 break; 725 } 726 if (isa<AddOperator>(Op) && 727 (!isa<Instruction>(Op) || 728 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 729 == FuncInfo.MBB) && 730 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 731 // An add (in the same block) with a constant operand. Fold the 732 // constant. 733 ConstantInt *CI = 734 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 735 TmpOffset += CI->getSExtValue() * S; 736 // Iterate on the other operand. 737 Op = cast<AddOperator>(Op)->getOperand(0); 738 continue; 739 } 740 // Unsupported 741 goto unsupported_gep; 742 } 743 } 744 } 745 746 // Try to grab the base operand now. 747 Addr.Offset = TmpOffset; 748 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 749 750 // We failed, restore everything and try the other options. 751 Addr = SavedAddr; 752 753 unsupported_gep: 754 break; 755 } 756 case Instruction::Alloca: { 757 const AllocaInst *AI = cast<AllocaInst>(Obj); 758 DenseMap<const AllocaInst*, int>::iterator SI = 759 FuncInfo.StaticAllocaMap.find(AI); 760 if (SI != FuncInfo.StaticAllocaMap.end()) { 761 Addr.BaseType = Address::FrameIndexBase; 762 Addr.Base.FI = SI->second; 763 return true; 764 } 765 break; 766 } 767 } 768 769 // Materialize the global variable's address into a reg which can 770 // then be used later to load the variable. 771 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 772 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 773 if (Tmp == 0) return false; 774 775 Addr.Base.Reg = Tmp; 776 return true; 777 } 778 779 // Try to get this in a register if nothing else has worked. 780 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 781 return Addr.Base.Reg != 0; 782 } 783 784 void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { 785 786 assert(VT.isSimple() && "Non-simple types are invalid here!"); 787 788 bool needsLowering = false; 789 switch (VT.getSimpleVT().SimpleTy) { 790 default: 791 assert(false && "Unhandled load/store type!"); 792 case MVT::i1: 793 case MVT::i8: 794 case MVT::i16: 795 case MVT::i32: 796 // Integer loads/stores handle 12-bit offsets. 797 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 798 break; 799 case MVT::f32: 800 case MVT::f64: 801 // Floating point operands handle 8-bit offsets. 802 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 803 break; 804 } 805 806 // If this is a stack pointer and the offset needs to be simplified then 807 // put the alloca address into a register, set the base type back to 808 // register and continue. This should almost never happen. 809 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 810 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 811 ARM::GPRRegisterClass; 812 unsigned ResultReg = createResultReg(RC); 813 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 814 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 815 TII.get(Opc), ResultReg) 816 .addFrameIndex(Addr.Base.FI) 817 .addImm(0)); 818 Addr.Base.Reg = ResultReg; 819 Addr.BaseType = Address::RegBase; 820 } 821 822 // Since the offset is too large for the load/store instruction 823 // get the reg+offset into a register. 824 if (needsLowering) { 825 ARMCC::CondCodes Pred = ARMCC::AL; 826 unsigned PredReg = 0; 827 828 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 829 ARM::GPRRegisterClass; 830 unsigned BaseReg = createResultReg(RC); 831 832 if (!isThumb) 833 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 834 BaseReg, Addr.Base.Reg, Addr.Offset, 835 Pred, PredReg, 836 static_cast<const ARMBaseInstrInfo&>(TII)); 837 else { 838 assert(AFI->isThumb2Function()); 839 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 840 BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, 841 static_cast<const ARMBaseInstrInfo&>(TII)); 842 } 843 Addr.Offset = 0; 844 Addr.Base.Reg = BaseReg; 845 } 846 } 847 848 void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 849 const MachineInstrBuilder &MIB) { 850 // addrmode5 output depends on the selection dag addressing dividing the 851 // offset by 4 that it then later multiplies. Do this here as well. 852 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 853 VT.getSimpleVT().SimpleTy == MVT::f64) 854 Addr.Offset /= 4; 855 856 // Frame base works a bit differently. Handle it separately. 857 if (Addr.BaseType == Address::FrameIndexBase) { 858 int FI = Addr.Base.FI; 859 int Offset = Addr.Offset; 860 MachineMemOperand *MMO = 861 FuncInfo.MF->getMachineMemOperand( 862 MachinePointerInfo::getFixedStack(FI, Offset), 863 MachineMemOperand::MOLoad, 864 MFI.getObjectSize(FI), 865 MFI.getObjectAlignment(FI)); 866 // Now add the rest of the operands. 867 MIB.addFrameIndex(FI); 868 869 // ARM halfword load/stores need an additional operand. 870 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 871 872 MIB.addImm(Addr.Offset); 873 MIB.addMemOperand(MMO); 874 } else { 875 // Now add the rest of the operands. 876 MIB.addReg(Addr.Base.Reg); 877 878 // ARM halfword load/stores need an additional operand. 879 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 880 881 MIB.addImm(Addr.Offset); 882 } 883 AddOptionalDefs(MIB); 884 } 885 886 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { 887 888 assert(VT.isSimple() && "Non-simple types are invalid here!"); 889 unsigned Opc; 890 TargetRegisterClass *RC; 891 switch (VT.getSimpleVT().SimpleTy) { 892 // This is mostly going to be Neon/vector support. 893 default: return false; 894 case MVT::i16: 895 Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; 896 RC = ARM::GPRRegisterClass; 897 break; 898 case MVT::i8: 899 Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; 900 RC = ARM::GPRRegisterClass; 901 break; 902 case MVT::i32: 903 Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; 904 RC = ARM::GPRRegisterClass; 905 break; 906 case MVT::f32: 907 Opc = ARM::VLDRS; 908 RC = TLI.getRegClassFor(VT); 909 break; 910 case MVT::f64: 911 Opc = ARM::VLDRD; 912 RC = TLI.getRegClassFor(VT); 913 break; 914 } 915 // Simplify this down to something we can handle. 916 ARMSimplifyAddress(Addr, VT); 917 918 // Create the base instruction, then add the operands. 919 ResultReg = createResultReg(RC); 920 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 921 TII.get(Opc), ResultReg); 922 AddLoadStoreOperands(VT, Addr, MIB); 923 return true; 924 } 925 926 bool ARMFastISel::SelectLoad(const Instruction *I) { 927 // Verify we have a legal type before going any further. 928 MVT VT; 929 if (!isLoadTypeLegal(I->getType(), VT)) 930 return false; 931 932 // See if we can handle this address. 933 Address Addr; 934 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 935 936 unsigned ResultReg; 937 if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; 938 UpdateValueMap(I, ResultReg); 939 return true; 940 } 941 942 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { 943 unsigned StrOpc; 944 switch (VT.getSimpleVT().SimpleTy) { 945 // This is mostly going to be Neon/vector support. 946 default: return false; 947 case MVT::i1: { 948 unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : 949 ARM::GPRRegisterClass); 950 unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 951 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 952 TII.get(Opc), Res) 953 .addReg(SrcReg).addImm(1)); 954 SrcReg = Res; 955 } // Fallthrough here. 956 case MVT::i8: 957 StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; 958 break; 959 case MVT::i16: 960 StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; 961 break; 962 case MVT::i32: 963 StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; 964 break; 965 case MVT::f32: 966 if (!Subtarget->hasVFP2()) return false; 967 StrOpc = ARM::VSTRS; 968 break; 969 case MVT::f64: 970 if (!Subtarget->hasVFP2()) return false; 971 StrOpc = ARM::VSTRD; 972 break; 973 } 974 // Simplify this down to something we can handle. 975 ARMSimplifyAddress(Addr, VT); 976 977 // Create the base instruction, then add the operands. 978 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 979 TII.get(StrOpc)) 980 .addReg(SrcReg, getKillRegState(true)); 981 AddLoadStoreOperands(VT, Addr, MIB); 982 return true; 983 } 984 985 bool ARMFastISel::SelectStore(const Instruction *I) { 986 Value *Op0 = I->getOperand(0); 987 unsigned SrcReg = 0; 988 989 // Verify we have a legal type before going any further. 990 MVT VT; 991 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 992 return false; 993 994 // Get the value to be stored into a register. 995 SrcReg = getRegForValue(Op0); 996 if (SrcReg == 0) return false; 997 998 // See if we can handle this address. 999 Address Addr; 1000 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1001 return false; 1002 1003 if (!ARMEmitStore(VT, SrcReg, Addr)) return false; 1004 return true; 1005 } 1006 1007 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1008 switch (Pred) { 1009 // Needs two compares... 1010 case CmpInst::FCMP_ONE: 1011 case CmpInst::FCMP_UEQ: 1012 default: 1013 // AL is our "false" for now. The other two need more compares. 1014 return ARMCC::AL; 1015 case CmpInst::ICMP_EQ: 1016 case CmpInst::FCMP_OEQ: 1017 return ARMCC::EQ; 1018 case CmpInst::ICMP_SGT: 1019 case CmpInst::FCMP_OGT: 1020 return ARMCC::GT; 1021 case CmpInst::ICMP_SGE: 1022 case CmpInst::FCMP_OGE: 1023 return ARMCC::GE; 1024 case CmpInst::ICMP_UGT: 1025 case CmpInst::FCMP_UGT: 1026 return ARMCC::HI; 1027 case CmpInst::FCMP_OLT: 1028 return ARMCC::MI; 1029 case CmpInst::ICMP_ULE: 1030 case CmpInst::FCMP_OLE: 1031 return ARMCC::LS; 1032 case CmpInst::FCMP_ORD: 1033 return ARMCC::VC; 1034 case CmpInst::FCMP_UNO: 1035 return ARMCC::VS; 1036 case CmpInst::FCMP_UGE: 1037 return ARMCC::PL; 1038 case CmpInst::ICMP_SLT: 1039 case CmpInst::FCMP_ULT: 1040 return ARMCC::LT; 1041 case CmpInst::ICMP_SLE: 1042 case CmpInst::FCMP_ULE: 1043 return ARMCC::LE; 1044 case CmpInst::FCMP_UNE: 1045 case CmpInst::ICMP_NE: 1046 return ARMCC::NE; 1047 case CmpInst::ICMP_UGE: 1048 return ARMCC::HS; 1049 case CmpInst::ICMP_ULT: 1050 return ARMCC::LO; 1051 } 1052 } 1053 1054 bool ARMFastISel::SelectBranch(const Instruction *I) { 1055 const BranchInst *BI = cast<BranchInst>(I); 1056 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1057 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1058 1059 // Simple branch support. 1060 1061 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1062 // behavior. 1063 // TODO: Factor this out. 1064 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1065 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1066 MVT VT; 1067 const Type *Ty = CI->getOperand(0)->getType(); 1068 if (!isTypeLegal(Ty, VT)) 1069 return false; 1070 1071 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1072 if (isFloat && !Subtarget->hasVFP2()) 1073 return false; 1074 1075 unsigned CmpOpc; 1076 switch (VT.SimpleTy) { 1077 default: return false; 1078 // TODO: Verify compares. 1079 case MVT::f32: 1080 CmpOpc = ARM::VCMPES; 1081 break; 1082 case MVT::f64: 1083 CmpOpc = ARM::VCMPED; 1084 break; 1085 case MVT::i32: 1086 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1087 break; 1088 } 1089 1090 // Get the compare predicate. 1091 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1092 1093 // We may not handle every CC for now. 1094 if (ARMPred == ARMCC::AL) return false; 1095 1096 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1097 if (Arg1 == 0) return false; 1098 1099 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1100 if (Arg2 == 0) return false; 1101 1102 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1103 TII.get(CmpOpc)) 1104 .addReg(Arg1).addReg(Arg2)); 1105 1106 // For floating point we need to move the result to a comparison register 1107 // that we can then use for branches. 1108 if (isFloat) 1109 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1110 TII.get(ARM::FMSTAT))); 1111 1112 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1114 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1115 FastEmitBranch(FBB, DL); 1116 FuncInfo.MBB->addSuccessor(TBB); 1117 return true; 1118 } 1119 } 1120 1121 unsigned CmpReg = getRegForValue(BI->getCondition()); 1122 if (CmpReg == 0) return false; 1123 1124 // Re-set the flags just in case. 1125 unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; 1126 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1127 .addReg(CmpReg).addImm(0)); 1128 1129 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1131 .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 1132 FastEmitBranch(FBB, DL); 1133 FuncInfo.MBB->addSuccessor(TBB); 1134 return true; 1135 } 1136 1137 bool ARMFastISel::SelectCmp(const Instruction *I) { 1138 const CmpInst *CI = cast<CmpInst>(I); 1139 1140 MVT VT; 1141 const Type *Ty = CI->getOperand(0)->getType(); 1142 if (!isTypeLegal(Ty, VT)) 1143 return false; 1144 1145 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1146 if (isFloat && !Subtarget->hasVFP2()) 1147 return false; 1148 1149 unsigned CmpOpc; 1150 unsigned CondReg; 1151 switch (VT.SimpleTy) { 1152 default: return false; 1153 // TODO: Verify compares. 1154 case MVT::f32: 1155 CmpOpc = ARM::VCMPES; 1156 CondReg = ARM::FPSCR; 1157 break; 1158 case MVT::f64: 1159 CmpOpc = ARM::VCMPED; 1160 CondReg = ARM::FPSCR; 1161 break; 1162 case MVT::i32: 1163 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1164 CondReg = ARM::CPSR; 1165 break; 1166 } 1167 1168 // Get the compare predicate. 1169 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1170 1171 // We may not handle every CC for now. 1172 if (ARMPred == ARMCC::AL) return false; 1173 1174 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1175 if (Arg1 == 0) return false; 1176 1177 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1178 if (Arg2 == 0) return false; 1179 1180 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1181 .addReg(Arg1).addReg(Arg2)); 1182 1183 // For floating point we need to move the result to a comparison register 1184 // that we can then use for branches. 1185 if (isFloat) 1186 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1187 TII.get(ARM::FMSTAT))); 1188 1189 // Now set a register based on the comparison. Explicitly set the predicates 1190 // here. 1191 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 1192 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 1193 : ARM::GPRRegisterClass; 1194 unsigned DestReg = createResultReg(RC); 1195 Constant *Zero 1196 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1197 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1198 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1199 .addReg(ZeroReg).addImm(1) 1200 .addImm(ARMPred).addReg(CondReg); 1201 1202 UpdateValueMap(I, DestReg); 1203 return true; 1204 } 1205 1206 bool ARMFastISel::SelectFPExt(const Instruction *I) { 1207 // Make sure we have VFP and that we're extending float to double. 1208 if (!Subtarget->hasVFP2()) return false; 1209 1210 Value *V = I->getOperand(0); 1211 if (!I->getType()->isDoubleTy() || 1212 !V->getType()->isFloatTy()) return false; 1213 1214 unsigned Op = getRegForValue(V); 1215 if (Op == 0) return false; 1216 1217 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1218 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1219 TII.get(ARM::VCVTDS), Result) 1220 .addReg(Op)); 1221 UpdateValueMap(I, Result); 1222 return true; 1223 } 1224 1225 bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1226 // Make sure we have VFP and that we're truncating double to float. 1227 if (!Subtarget->hasVFP2()) return false; 1228 1229 Value *V = I->getOperand(0); 1230 if (!(I->getType()->isFloatTy() && 1231 V->getType()->isDoubleTy())) return false; 1232 1233 unsigned Op = getRegForValue(V); 1234 if (Op == 0) return false; 1235 1236 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1237 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1238 TII.get(ARM::VCVTSD), Result) 1239 .addReg(Op)); 1240 UpdateValueMap(I, Result); 1241 return true; 1242 } 1243 1244 bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1245 // Make sure we have VFP. 1246 if (!Subtarget->hasVFP2()) return false; 1247 1248 MVT DstVT; 1249 const Type *Ty = I->getType(); 1250 if (!isTypeLegal(Ty, DstVT)) 1251 return false; 1252 1253 unsigned Op = getRegForValue(I->getOperand(0)); 1254 if (Op == 0) return false; 1255 1256 // The conversion routine works on fp-reg to fp-reg and the operand above 1257 // was an integer, move it to the fp registers if possible. 1258 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1259 if (FP == 0) return false; 1260 1261 unsigned Opc; 1262 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1263 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1264 else return 0; 1265 1266 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1267 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1268 ResultReg) 1269 .addReg(FP)); 1270 UpdateValueMap(I, ResultReg); 1271 return true; 1272 } 1273 1274 bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1275 // Make sure we have VFP. 1276 if (!Subtarget->hasVFP2()) return false; 1277 1278 MVT DstVT; 1279 const Type *RetTy = I->getType(); 1280 if (!isTypeLegal(RetTy, DstVT)) 1281 return false; 1282 1283 unsigned Op = getRegForValue(I->getOperand(0)); 1284 if (Op == 0) return false; 1285 1286 unsigned Opc; 1287 const Type *OpTy = I->getOperand(0)->getType(); 1288 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1289 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1290 else return 0; 1291 1292 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1293 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1294 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1295 ResultReg) 1296 .addReg(Op)); 1297 1298 // This result needs to be in an integer register, but the conversion only 1299 // takes place in fp-regs. 1300 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1301 if (IntReg == 0) return false; 1302 1303 UpdateValueMap(I, IntReg); 1304 return true; 1305 } 1306 1307 bool ARMFastISel::SelectSelect(const Instruction *I) { 1308 MVT VT; 1309 if (!isTypeLegal(I->getType(), VT)) 1310 return false; 1311 1312 // Things need to be register sized for register moves. 1313 if (VT != MVT::i32) return false; 1314 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1315 1316 unsigned CondReg = getRegForValue(I->getOperand(0)); 1317 if (CondReg == 0) return false; 1318 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1319 if (Op1Reg == 0) return false; 1320 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1321 if (Op2Reg == 0) return false; 1322 1323 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1324 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1325 .addReg(CondReg).addImm(1)); 1326 unsigned ResultReg = createResultReg(RC); 1327 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1328 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1329 .addReg(Op1Reg).addReg(Op2Reg) 1330 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1331 UpdateValueMap(I, ResultReg); 1332 return true; 1333 } 1334 1335 bool ARMFastISel::SelectSDiv(const Instruction *I) { 1336 MVT VT; 1337 const Type *Ty = I->getType(); 1338 if (!isTypeLegal(Ty, VT)) 1339 return false; 1340 1341 // If we have integer div support we should have selected this automagically. 1342 // In case we have a real miss go ahead and return false and we'll pick 1343 // it up later. 1344 if (Subtarget->hasDivide()) return false; 1345 1346 // Otherwise emit a libcall. 1347 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1348 if (VT == MVT::i8) 1349 LC = RTLIB::SDIV_I8; 1350 else if (VT == MVT::i16) 1351 LC = RTLIB::SDIV_I16; 1352 else if (VT == MVT::i32) 1353 LC = RTLIB::SDIV_I32; 1354 else if (VT == MVT::i64) 1355 LC = RTLIB::SDIV_I64; 1356 else if (VT == MVT::i128) 1357 LC = RTLIB::SDIV_I128; 1358 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1359 1360 return ARMEmitLibcall(I, LC); 1361 } 1362 1363 bool ARMFastISel::SelectSRem(const Instruction *I) { 1364 MVT VT; 1365 const Type *Ty = I->getType(); 1366 if (!isTypeLegal(Ty, VT)) 1367 return false; 1368 1369 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1370 if (VT == MVT::i8) 1371 LC = RTLIB::SREM_I8; 1372 else if (VT == MVT::i16) 1373 LC = RTLIB::SREM_I16; 1374 else if (VT == MVT::i32) 1375 LC = RTLIB::SREM_I32; 1376 else if (VT == MVT::i64) 1377 LC = RTLIB::SREM_I64; 1378 else if (VT == MVT::i128) 1379 LC = RTLIB::SREM_I128; 1380 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1381 1382 return ARMEmitLibcall(I, LC); 1383 } 1384 1385 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1386 EVT VT = TLI.getValueType(I->getType(), true); 1387 1388 // We can get here in the case when we want to use NEON for our fp 1389 // operations, but can't figure out how to. Just use the vfp instructions 1390 // if we have them. 1391 // FIXME: It'd be nice to use NEON instructions. 1392 const Type *Ty = I->getType(); 1393 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1394 if (isFloat && !Subtarget->hasVFP2()) 1395 return false; 1396 1397 unsigned Op1 = getRegForValue(I->getOperand(0)); 1398 if (Op1 == 0) return false; 1399 1400 unsigned Op2 = getRegForValue(I->getOperand(1)); 1401 if (Op2 == 0) return false; 1402 1403 unsigned Opc; 1404 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1405 switch (ISDOpcode) { 1406 default: return false; 1407 case ISD::FADD: 1408 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1409 break; 1410 case ISD::FSUB: 1411 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1412 break; 1413 case ISD::FMUL: 1414 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1415 break; 1416 } 1417 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1418 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1419 TII.get(Opc), ResultReg) 1420 .addReg(Op1).addReg(Op2)); 1421 UpdateValueMap(I, ResultReg); 1422 return true; 1423 } 1424 1425 // Call Handling Code 1426 1427 bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, 1428 EVT SrcVT, unsigned &ResultReg) { 1429 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 1430 Src, /*TODO: Kill=*/false); 1431 1432 if (RR != 0) { 1433 ResultReg = RR; 1434 return true; 1435 } else 1436 return false; 1437 } 1438 1439 // This is largely taken directly from CCAssignFnForNode - we don't support 1440 // varargs in FastISel so that part has been removed. 1441 // TODO: We may not support all of this. 1442 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1443 switch (CC) { 1444 default: 1445 llvm_unreachable("Unsupported calling convention"); 1446 case CallingConv::Fast: 1447 // Ignore fastcc. Silence compiler warnings. 1448 (void)RetFastCC_ARM_APCS; 1449 (void)FastCC_ARM_APCS; 1450 // Fallthrough 1451 case CallingConv::C: 1452 // Use target triple & subtarget features to do actual dispatch. 1453 if (Subtarget->isAAPCS_ABI()) { 1454 if (Subtarget->hasVFP2() && 1455 FloatABIType == FloatABI::Hard) 1456 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1457 else 1458 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1459 } else 1460 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1461 case CallingConv::ARM_AAPCS_VFP: 1462 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1463 case CallingConv::ARM_AAPCS: 1464 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1465 case CallingConv::ARM_APCS: 1466 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1467 } 1468 } 1469 1470 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1471 SmallVectorImpl<unsigned> &ArgRegs, 1472 SmallVectorImpl<MVT> &ArgVTs, 1473 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1474 SmallVectorImpl<unsigned> &RegArgs, 1475 CallingConv::ID CC, 1476 unsigned &NumBytes) { 1477 SmallVector<CCValAssign, 16> ArgLocs; 1478 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1479 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1480 1481 // Get a count of how many bytes are to be pushed on the stack. 1482 NumBytes = CCInfo.getNextStackOffset(); 1483 1484 // Issue CALLSEQ_START 1485 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1486 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1487 TII.get(AdjStackDown)) 1488 .addImm(NumBytes)); 1489 1490 // Process the args. 1491 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1492 CCValAssign &VA = ArgLocs[i]; 1493 unsigned Arg = ArgRegs[VA.getValNo()]; 1494 MVT ArgVT = ArgVTs[VA.getValNo()]; 1495 1496 // We don't handle NEON/vector parameters yet. 1497 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1498 return false; 1499 1500 // Handle arg promotion, etc. 1501 switch (VA.getLocInfo()) { 1502 case CCValAssign::Full: break; 1503 case CCValAssign::SExt: { 1504 bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1505 Arg, ArgVT, Arg); 1506 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1507 Emitted = true; 1508 ArgVT = VA.getLocVT(); 1509 break; 1510 } 1511 case CCValAssign::ZExt: { 1512 bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1513 Arg, ArgVT, Arg); 1514 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1515 Emitted = true; 1516 ArgVT = VA.getLocVT(); 1517 break; 1518 } 1519 case CCValAssign::AExt: { 1520 bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1521 Arg, ArgVT, Arg); 1522 if (!Emitted) 1523 Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1524 Arg, ArgVT, Arg); 1525 if (!Emitted) 1526 Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1527 Arg, ArgVT, Arg); 1528 1529 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1530 ArgVT = VA.getLocVT(); 1531 break; 1532 } 1533 case CCValAssign::BCvt: { 1534 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1535 /*TODO: Kill=*/false); 1536 assert(BC != 0 && "Failed to emit a bitcast!"); 1537 Arg = BC; 1538 ArgVT = VA.getLocVT(); 1539 break; 1540 } 1541 default: llvm_unreachable("Unknown arg promotion!"); 1542 } 1543 1544 // Now copy/store arg to correct locations. 1545 if (VA.isRegLoc() && !VA.needsCustom()) { 1546 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1547 VA.getLocReg()) 1548 .addReg(Arg); 1549 RegArgs.push_back(VA.getLocReg()); 1550 } else if (VA.needsCustom()) { 1551 // TODO: We need custom lowering for vector (v2f64) args. 1552 if (VA.getLocVT() != MVT::f64) return false; 1553 1554 CCValAssign &NextVA = ArgLocs[++i]; 1555 1556 // TODO: Only handle register args for now. 1557 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1558 1559 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1560 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1561 .addReg(NextVA.getLocReg(), RegState::Define) 1562 .addReg(Arg)); 1563 RegArgs.push_back(VA.getLocReg()); 1564 RegArgs.push_back(NextVA.getLocReg()); 1565 } else { 1566 assert(VA.isMemLoc()); 1567 // Need to store on the stack. 1568 Address Addr; 1569 Addr.BaseType = Address::RegBase; 1570 Addr.Base.Reg = ARM::SP; 1571 Addr.Offset = VA.getLocMemOffset(); 1572 1573 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1574 } 1575 } 1576 return true; 1577 } 1578 1579 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1580 const Instruction *I, CallingConv::ID CC, 1581 unsigned &NumBytes) { 1582 // Issue CALLSEQ_END 1583 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1584 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1585 TII.get(AdjStackUp)) 1586 .addImm(NumBytes).addImm(0)); 1587 1588 // Now the return value. 1589 if (RetVT != MVT::isVoid) { 1590 SmallVector<CCValAssign, 16> RVLocs; 1591 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1592 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1593 1594 // Copy all of the result registers out of their specified physreg. 1595 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1596 // For this move we copy into two registers and then move into the 1597 // double fp reg we want. 1598 EVT DestVT = RVLocs[0].getValVT(); 1599 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1600 unsigned ResultReg = createResultReg(DstRC); 1601 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1602 TII.get(ARM::VMOVDRR), ResultReg) 1603 .addReg(RVLocs[0].getLocReg()) 1604 .addReg(RVLocs[1].getLocReg())); 1605 1606 UsedRegs.push_back(RVLocs[0].getLocReg()); 1607 UsedRegs.push_back(RVLocs[1].getLocReg()); 1608 1609 // Finally update the result. 1610 UpdateValueMap(I, ResultReg); 1611 } else { 1612 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1613 EVT CopyVT = RVLocs[0].getValVT(); 1614 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1615 1616 unsigned ResultReg = createResultReg(DstRC); 1617 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1618 ResultReg).addReg(RVLocs[0].getLocReg()); 1619 UsedRegs.push_back(RVLocs[0].getLocReg()); 1620 1621 // Finally update the result. 1622 UpdateValueMap(I, ResultReg); 1623 } 1624 } 1625 1626 return true; 1627 } 1628 1629 bool ARMFastISel::SelectRet(const Instruction *I) { 1630 const ReturnInst *Ret = cast<ReturnInst>(I); 1631 const Function &F = *I->getParent()->getParent(); 1632 1633 if (!FuncInfo.CanLowerReturn) 1634 return false; 1635 1636 if (F.isVarArg()) 1637 return false; 1638 1639 CallingConv::ID CC = F.getCallingConv(); 1640 if (Ret->getNumOperands() > 0) { 1641 SmallVector<ISD::OutputArg, 4> Outs; 1642 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1643 Outs, TLI); 1644 1645 // Analyze operands of the call, assigning locations to each operand. 1646 SmallVector<CCValAssign, 16> ValLocs; 1647 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 1648 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1649 1650 const Value *RV = Ret->getOperand(0); 1651 unsigned Reg = getRegForValue(RV); 1652 if (Reg == 0) 1653 return false; 1654 1655 // Only handle a single return value for now. 1656 if (ValLocs.size() != 1) 1657 return false; 1658 1659 CCValAssign &VA = ValLocs[0]; 1660 1661 // Don't bother handling odd stuff for now. 1662 if (VA.getLocInfo() != CCValAssign::Full) 1663 return false; 1664 // Only handle register returns for now. 1665 if (!VA.isRegLoc()) 1666 return false; 1667 // TODO: For now, don't try to handle cases where getLocInfo() 1668 // says Full but the types don't match. 1669 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 1670 return false; 1671 1672 // Make the copy. 1673 unsigned SrcReg = Reg + VA.getValNo(); 1674 unsigned DstReg = VA.getLocReg(); 1675 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1676 // Avoid a cross-class copy. This is very unlikely. 1677 if (!SrcRC->contains(DstReg)) 1678 return false; 1679 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1680 DstReg).addReg(SrcReg); 1681 1682 // Mark the register as live out of the function. 1683 MRI.addLiveOut(VA.getLocReg()); 1684 } 1685 1686 unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; 1687 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1688 TII.get(RetOpc))); 1689 return true; 1690 } 1691 1692 unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 1693 1694 // Darwin needs the r9 versions of the opcodes. 1695 bool isDarwin = Subtarget->isTargetDarwin(); 1696 if (isThumb) { 1697 return isDarwin ? ARM::tBLr9 : ARM::tBL; 1698 } else { 1699 return isDarwin ? ARM::BLr9 : ARM::BL; 1700 } 1701 } 1702 1703 // A quick function that will emit a call for a named libcall in F with the 1704 // vector of passed arguments for the Instruction in I. We can assume that we 1705 // can emit a call for any libcall we can produce. This is an abridged version 1706 // of the full call infrastructure since we won't need to worry about things 1707 // like computed function pointers or strange arguments at call sites. 1708 // TODO: Try to unify this and the normal call bits for ARM, then try to unify 1709 // with X86. 1710 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1711 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1712 1713 // Handle *simple* calls for now. 1714 const Type *RetTy = I->getType(); 1715 MVT RetVT; 1716 if (RetTy->isVoidTy()) 1717 RetVT = MVT::isVoid; 1718 else if (!isTypeLegal(RetTy, RetVT)) 1719 return false; 1720 1721 // For now we're using BLX etc on the assumption that we have v5t ops. 1722 if (!Subtarget->hasV5TOps()) return false; 1723 1724 // TODO: For now if we have long calls specified we don't handle the call. 1725 if (EnableARMLongCalls) return false; 1726 1727 // Set up the argument vectors. 1728 SmallVector<Value*, 8> Args; 1729 SmallVector<unsigned, 8> ArgRegs; 1730 SmallVector<MVT, 8> ArgVTs; 1731 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1732 Args.reserve(I->getNumOperands()); 1733 ArgRegs.reserve(I->getNumOperands()); 1734 ArgVTs.reserve(I->getNumOperands()); 1735 ArgFlags.reserve(I->getNumOperands()); 1736 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1737 Value *Op = I->getOperand(i); 1738 unsigned Arg = getRegForValue(Op); 1739 if (Arg == 0) return false; 1740 1741 const Type *ArgTy = Op->getType(); 1742 MVT ArgVT; 1743 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1744 1745 ISD::ArgFlagsTy Flags; 1746 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1747 Flags.setOrigAlign(OriginalAlignment); 1748 1749 Args.push_back(Op); 1750 ArgRegs.push_back(Arg); 1751 ArgVTs.push_back(ArgVT); 1752 ArgFlags.push_back(Flags); 1753 } 1754 1755 // Handle the arguments now that we've gotten them. 1756 SmallVector<unsigned, 4> RegArgs; 1757 unsigned NumBytes; 1758 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1759 return false; 1760 1761 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1762 // TODO: Turn this into the table of arm call ops. 1763 MachineInstrBuilder MIB; 1764 unsigned CallOpc = ARMSelectCallOp(NULL); 1765 if(isThumb) 1766 // Explicitly adding the predicate here. 1767 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1768 TII.get(CallOpc))) 1769 .addExternalSymbol(TLI.getLibcallName(Call)); 1770 else 1771 // Explicitly adding the predicate here. 1772 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1773 TII.get(CallOpc)) 1774 .addExternalSymbol(TLI.getLibcallName(Call))); 1775 1776 // Add implicit physical register uses to the call. 1777 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1778 MIB.addReg(RegArgs[i]); 1779 1780 // Finish off the call including any return values. 1781 SmallVector<unsigned, 4> UsedRegs; 1782 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1783 1784 // Set all unused physreg defs as dead. 1785 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1786 1787 return true; 1788 } 1789 1790 bool ARMFastISel::SelectCall(const Instruction *I) { 1791 const CallInst *CI = cast<CallInst>(I); 1792 const Value *Callee = CI->getCalledValue(); 1793 1794 // Can't handle inline asm or worry about intrinsics yet. 1795 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1796 1797 // Only handle global variable Callees that are direct calls. 1798 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1799 if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) 1800 return false; 1801 1802 // Check the calling convention. 1803 ImmutableCallSite CS(CI); 1804 CallingConv::ID CC = CS.getCallingConv(); 1805 1806 // TODO: Avoid some calling conventions? 1807 1808 // Let SDISel handle vararg functions. 1809 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1810 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1811 if (FTy->isVarArg()) 1812 return false; 1813 1814 // Handle *simple* calls for now. 1815 const Type *RetTy = I->getType(); 1816 MVT RetVT; 1817 if (RetTy->isVoidTy()) 1818 RetVT = MVT::isVoid; 1819 else if (!isTypeLegal(RetTy, RetVT)) 1820 return false; 1821 1822 // For now we're using BLX etc on the assumption that we have v5t ops. 1823 // TODO: Maybe? 1824 if (!Subtarget->hasV5TOps()) return false; 1825 1826 // TODO: For now if we have long calls specified we don't handle the call. 1827 if (EnableARMLongCalls) return false; 1828 1829 // Set up the argument vectors. 1830 SmallVector<Value*, 8> Args; 1831 SmallVector<unsigned, 8> ArgRegs; 1832 SmallVector<MVT, 8> ArgVTs; 1833 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1834 Args.reserve(CS.arg_size()); 1835 ArgRegs.reserve(CS.arg_size()); 1836 ArgVTs.reserve(CS.arg_size()); 1837 ArgFlags.reserve(CS.arg_size()); 1838 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1839 i != e; ++i) { 1840 unsigned Arg = getRegForValue(*i); 1841 1842 if (Arg == 0) 1843 return false; 1844 ISD::ArgFlagsTy Flags; 1845 unsigned AttrInd = i - CS.arg_begin() + 1; 1846 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1847 Flags.setSExt(); 1848 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1849 Flags.setZExt(); 1850 1851 // FIXME: Only handle *easy* calls for now. 1852 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1853 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1854 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1855 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1856 return false; 1857 1858 const Type *ArgTy = (*i)->getType(); 1859 MVT ArgVT; 1860 if (!isTypeLegal(ArgTy, ArgVT)) 1861 return false; 1862 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1863 Flags.setOrigAlign(OriginalAlignment); 1864 1865 Args.push_back(*i); 1866 ArgRegs.push_back(Arg); 1867 ArgVTs.push_back(ArgVT); 1868 ArgFlags.push_back(Flags); 1869 } 1870 1871 // Handle the arguments now that we've gotten them. 1872 SmallVector<unsigned, 4> RegArgs; 1873 unsigned NumBytes; 1874 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1875 return false; 1876 1877 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1878 // TODO: Turn this into the table of arm call ops. 1879 MachineInstrBuilder MIB; 1880 unsigned CallOpc = ARMSelectCallOp(GV); 1881 // Explicitly adding the predicate here. 1882 if(isThumb) 1883 // Explicitly adding the predicate here. 1884 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1885 TII.get(CallOpc))) 1886 .addGlobalAddress(GV, 0, 0); 1887 else 1888 // Explicitly adding the predicate here. 1889 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1890 TII.get(CallOpc)) 1891 .addGlobalAddress(GV, 0, 0)); 1892 1893 // Add implicit physical register uses to the call. 1894 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1895 MIB.addReg(RegArgs[i]); 1896 1897 // Finish off the call including any return values. 1898 SmallVector<unsigned, 4> UsedRegs; 1899 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1900 1901 // Set all unused physreg defs as dead. 1902 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1903 1904 return true; 1905 1906 } 1907 1908 // TODO: SoftFP support. 1909 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1910 1911 switch (I->getOpcode()) { 1912 case Instruction::Load: 1913 return SelectLoad(I); 1914 case Instruction::Store: 1915 return SelectStore(I); 1916 case Instruction::Br: 1917 return SelectBranch(I); 1918 case Instruction::ICmp: 1919 case Instruction::FCmp: 1920 return SelectCmp(I); 1921 case Instruction::FPExt: 1922 return SelectFPExt(I); 1923 case Instruction::FPTrunc: 1924 return SelectFPTrunc(I); 1925 case Instruction::SIToFP: 1926 return SelectSIToFP(I); 1927 case Instruction::FPToSI: 1928 return SelectFPToSI(I); 1929 case Instruction::FAdd: 1930 return SelectBinaryOp(I, ISD::FADD); 1931 case Instruction::FSub: 1932 return SelectBinaryOp(I, ISD::FSUB); 1933 case Instruction::FMul: 1934 return SelectBinaryOp(I, ISD::FMUL); 1935 case Instruction::SDiv: 1936 return SelectSDiv(I); 1937 case Instruction::SRem: 1938 return SelectSRem(I); 1939 case Instruction::Call: 1940 return SelectCall(I); 1941 case Instruction::Select: 1942 return SelectSelect(I); 1943 case Instruction::Ret: 1944 return SelectRet(I); 1945 default: break; 1946 } 1947 return false; 1948 } 1949 1950 namespace llvm { 1951 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1952 // Completely untested on non-darwin. 1953 const TargetMachine &TM = funcInfo.MF->getTarget(); 1954 1955 // Darwin and thumb1 only for now. 1956 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 1957 if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && 1958 !DisableARMFastISel) 1959 return new ARMFastISel(funcInfo); 1960 return 0; 1961 } 1962 } 1963