1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the Base ARM implementation of the TargetInstrInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMBaseRegisterInfo.h" 16 #include "ARMConstantPoolValue.h" 17 #include "ARMFeatures.h" 18 #include "ARMHazardRecognizer.h" 19 #include "ARMMachineFunctionInfo.h" 20 #include "ARMSubtarget.h" 21 #include "MCTargetDesc/ARMAddressingModes.h" 22 #include "MCTargetDesc/ARMBaseInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/Triple.h" 28 #include "llvm/CodeGen/LiveVariables.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineConstantPool.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineMemOperand.h" 36 #include "llvm/CodeGen/MachineOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" 39 #include "llvm/CodeGen/SelectionDAGNodes.h" 40 #include "llvm/CodeGen/TargetSchedule.h" 41 #include "llvm/IR/Attributes.h" 42 #include "llvm/IR/Constants.h" 43 #include "llvm/IR/DebugLoc.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/GlobalValue.h" 46 #include "llvm/MC/MCAsmInfo.h" 47 #include "llvm/MC/MCInstrDesc.h" 48 #include "llvm/MC/MCInstrItineraries.h" 49 #include "llvm/Support/BranchProbability.h" 50 #include "llvm/Support/Casting.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/Compiler.h" 53 #include "llvm/Support/Debug.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include "llvm/Target/TargetInstrInfo.h" 57 #include "llvm/Target/TargetMachine.h" 58 #include "llvm/Target/TargetRegisterInfo.h" 59 #include <algorithm> 60 #include <cassert> 61 #include <cstdint> 62 #include <iterator> 63 #include <new> 64 #include <utility> 65 #include <vector> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "arm-instrinfo" 70 71 #define GET_INSTRINFO_CTOR_DTOR 72 #include "ARMGenInstrInfo.inc" 73 74 static cl::opt<bool> 75 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 76 cl::desc("Enable ARM 2-addr to 3-addr conv")); 77 78 /// ARM_MLxEntry - Record information about MLA / MLS instructions. 79 struct ARM_MLxEntry { 80 uint16_t MLxOpc; // MLA / MLS opcode 81 uint16_t MulOpc; // Expanded multiplication opcode 82 uint16_t AddSubOpc; // Expanded add / sub opcode 83 bool NegAcc; // True if the acc is negated before the add / sub. 84 bool HasLane; // True if instruction has an extra "lane" operand. 85 }; 86 87 static const ARM_MLxEntry ARM_MLxTable[] = { 88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 89 // fp scalar ops 90 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 91 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 92 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 93 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 94 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 95 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 96 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 97 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 98 99 // fp SIMD ops 100 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 101 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 102 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 103 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 104 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 105 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 106 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 107 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 108 }; 109 110 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 111 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 112 Subtarget(STI) { 113 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 114 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 115 llvm_unreachable("Duplicated entries?"); 116 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 117 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 118 } 119 } 120 121 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 122 // currently defaults to no prepass hazard recognizer. 123 ScheduleHazardRecognizer * 124 ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 125 const ScheduleDAG *DAG) const { 126 if (usePreRAHazardRecognizer()) { 127 const InstrItineraryData *II = 128 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); 129 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 130 } 131 return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); 132 } 133 134 ScheduleHazardRecognizer *ARMBaseInstrInfo:: 135 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 136 const ScheduleDAG *DAG) const { 137 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 138 return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); 139 return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 140 } 141 142 MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( 143 MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { 144 // FIXME: Thumb2 support. 145 146 if (!EnableARM3Addr) 147 return nullptr; 148 149 MachineFunction &MF = *MI.getParent()->getParent(); 150 uint64_t TSFlags = MI.getDesc().TSFlags; 151 bool isPre = false; 152 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 153 default: return nullptr; 154 case ARMII::IndexModePre: 155 isPre = true; 156 break; 157 case ARMII::IndexModePost: 158 break; 159 } 160 161 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 162 // operation. 163 unsigned MemOpc = getUnindexedOpcode(MI.getOpcode()); 164 if (MemOpc == 0) 165 return nullptr; 166 167 MachineInstr *UpdateMI = nullptr; 168 MachineInstr *MemMI = nullptr; 169 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 170 const MCInstrDesc &MCID = MI.getDesc(); 171 unsigned NumOps = MCID.getNumOperands(); 172 bool isLoad = !MI.mayStore(); 173 const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); 174 const MachineOperand &Base = MI.getOperand(2); 175 const MachineOperand &Offset = MI.getOperand(NumOps - 3); 176 unsigned WBReg = WB.getReg(); 177 unsigned BaseReg = Base.getReg(); 178 unsigned OffReg = Offset.getReg(); 179 unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); 180 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); 181 switch (AddrMode) { 182 default: llvm_unreachable("Unknown indexed op!"); 183 case ARMII::AddrMode2: { 184 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 185 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 186 if (OffReg == 0) { 187 if (ARM_AM::getSOImmVal(Amt) == -1) 188 // Can't encode it in a so_imm operand. This transformation will 189 // add more than 1 instruction. Abandon! 190 return nullptr; 191 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 192 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 193 .addReg(BaseReg) 194 .addImm(Amt) 195 .add(predOps(Pred)) 196 .add(condCodeOp()); 197 } else if (Amt != 0) { 198 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 199 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 200 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 201 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 202 .addReg(BaseReg) 203 .addReg(OffReg) 204 .addReg(0) 205 .addImm(SOOpc) 206 .add(predOps(Pred)) 207 .add(condCodeOp()); 208 } else 209 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 210 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 211 .addReg(BaseReg) 212 .addReg(OffReg) 213 .add(predOps(Pred)) 214 .add(condCodeOp()); 215 break; 216 } 217 case ARMII::AddrMode3 : { 218 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 219 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 220 if (OffReg == 0) 221 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 222 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 223 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 224 .addReg(BaseReg) 225 .addImm(Amt) 226 .add(predOps(Pred)) 227 .add(condCodeOp()); 228 else 229 UpdateMI = BuildMI(MF, MI.getDebugLoc(), 230 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 231 .addReg(BaseReg) 232 .addReg(OffReg) 233 .add(predOps(Pred)) 234 .add(condCodeOp()); 235 break; 236 } 237 } 238 239 std::vector<MachineInstr*> NewMIs; 240 if (isPre) { 241 if (isLoad) 242 MemMI = 243 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 244 .addReg(WBReg) 245 .addImm(0) 246 .addImm(Pred); 247 else 248 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 249 .addReg(MI.getOperand(1).getReg()) 250 .addReg(WBReg) 251 .addReg(0) 252 .addImm(0) 253 .addImm(Pred); 254 NewMIs.push_back(MemMI); 255 NewMIs.push_back(UpdateMI); 256 } else { 257 if (isLoad) 258 MemMI = 259 BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg()) 260 .addReg(BaseReg) 261 .addImm(0) 262 .addImm(Pred); 263 else 264 MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc)) 265 .addReg(MI.getOperand(1).getReg()) 266 .addReg(BaseReg) 267 .addReg(0) 268 .addImm(0) 269 .addImm(Pred); 270 if (WB.isDead()) 271 UpdateMI->getOperand(0).setIsDead(); 272 NewMIs.push_back(UpdateMI); 273 NewMIs.push_back(MemMI); 274 } 275 276 // Transfer LiveVariables states, kill / dead info. 277 if (LV) { 278 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 279 MachineOperand &MO = MI.getOperand(i); 280 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 281 unsigned Reg = MO.getReg(); 282 283 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 284 if (MO.isDef()) { 285 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 286 if (MO.isDead()) 287 LV->addVirtualRegisterDead(Reg, *NewMI); 288 } 289 if (MO.isUse() && MO.isKill()) { 290 for (unsigned j = 0; j < 2; ++j) { 291 // Look at the two new MI's in reverse order. 292 MachineInstr *NewMI = NewMIs[j]; 293 if (!NewMI->readsRegister(Reg)) 294 continue; 295 LV->addVirtualRegisterKilled(Reg, *NewMI); 296 if (VI.removeKill(MI)) 297 VI.Kills.push_back(NewMI); 298 break; 299 } 300 } 301 } 302 } 303 } 304 305 MachineBasicBlock::iterator MBBI = MI.getIterator(); 306 MFI->insert(MBBI, NewMIs[1]); 307 MFI->insert(MBBI, NewMIs[0]); 308 return NewMIs[0]; 309 } 310 311 // Branch analysis. 312 bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 313 MachineBasicBlock *&TBB, 314 MachineBasicBlock *&FBB, 315 SmallVectorImpl<MachineOperand> &Cond, 316 bool AllowModify) const { 317 TBB = nullptr; 318 FBB = nullptr; 319 320 MachineBasicBlock::iterator I = MBB.end(); 321 if (I == MBB.begin()) 322 return false; // Empty blocks are easy. 323 --I; 324 325 // Walk backwards from the end of the basic block until the branch is 326 // analyzed or we give up. 327 while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { 328 // Flag to be raised on unanalyzeable instructions. This is useful in cases 329 // where we want to clean up on the end of the basic block before we bail 330 // out. 331 bool CantAnalyze = false; 332 333 // Skip over DEBUG values and predicated nonterminators. 334 while (I->isDebugValue() || !I->isTerminator()) { 335 if (I == MBB.begin()) 336 return false; 337 --I; 338 } 339 340 if (isIndirectBranchOpcode(I->getOpcode()) || 341 isJumpTableBranchOpcode(I->getOpcode())) { 342 // Indirect branches and jump tables can't be analyzed, but we still want 343 // to clean up any instructions at the tail of the basic block. 344 CantAnalyze = true; 345 } else if (isUncondBranchOpcode(I->getOpcode())) { 346 TBB = I->getOperand(0).getMBB(); 347 } else if (isCondBranchOpcode(I->getOpcode())) { 348 // Bail out if we encounter multiple conditional branches. 349 if (!Cond.empty()) 350 return true; 351 352 assert(!FBB && "FBB should have been null."); 353 FBB = TBB; 354 TBB = I->getOperand(0).getMBB(); 355 Cond.push_back(I->getOperand(1)); 356 Cond.push_back(I->getOperand(2)); 357 } else if (I->isReturn()) { 358 // Returns can't be analyzed, but we should run cleanup. 359 CantAnalyze = !isPredicated(*I); 360 } else { 361 // We encountered other unrecognized terminator. Bail out immediately. 362 return true; 363 } 364 365 // Cleanup code - to be run for unpredicated unconditional branches and 366 // returns. 367 if (!isPredicated(*I) && 368 (isUncondBranchOpcode(I->getOpcode()) || 369 isIndirectBranchOpcode(I->getOpcode()) || 370 isJumpTableBranchOpcode(I->getOpcode()) || 371 I->isReturn())) { 372 // Forget any previous condition branch information - it no longer applies. 373 Cond.clear(); 374 FBB = nullptr; 375 376 // If we can modify the function, delete everything below this 377 // unconditional branch. 378 if (AllowModify) { 379 MachineBasicBlock::iterator DI = std::next(I); 380 while (DI != MBB.end()) { 381 MachineInstr &InstToDelete = *DI; 382 ++DI; 383 InstToDelete.eraseFromParent(); 384 } 385 } 386 } 387 388 if (CantAnalyze) 389 return true; 390 391 if (I == MBB.begin()) 392 return false; 393 394 --I; 395 } 396 397 // We made it past the terminators without bailing out - we must have 398 // analyzed this branch successfully. 399 return false; 400 } 401 402 unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, 403 int *BytesRemoved) const { 404 assert(!BytesRemoved && "code size not handled"); 405 406 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 407 if (I == MBB.end()) 408 return 0; 409 410 if (!isUncondBranchOpcode(I->getOpcode()) && 411 !isCondBranchOpcode(I->getOpcode())) 412 return 0; 413 414 // Remove the branch. 415 I->eraseFromParent(); 416 417 I = MBB.end(); 418 419 if (I == MBB.begin()) return 1; 420 --I; 421 if (!isCondBranchOpcode(I->getOpcode())) 422 return 1; 423 424 // Remove the branch. 425 I->eraseFromParent(); 426 return 2; 427 } 428 429 unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, 430 MachineBasicBlock *TBB, 431 MachineBasicBlock *FBB, 432 ArrayRef<MachineOperand> Cond, 433 const DebugLoc &DL, 434 int *BytesAdded) const { 435 assert(!BytesAdded && "code size not handled"); 436 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 437 int BOpc = !AFI->isThumbFunction() 438 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 439 int BccOpc = !AFI->isThumbFunction() 440 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 441 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 442 443 // Shouldn't be a fall through. 444 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 445 assert((Cond.size() == 2 || Cond.size() == 0) && 446 "ARM branch conditions have two components!"); 447 448 // For conditional branches, we use addOperand to preserve CPSR flags. 449 450 if (!FBB) { 451 if (Cond.empty()) { // Unconditional branch? 452 if (isThumb) 453 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); 454 else 455 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 456 } else 457 BuildMI(&MBB, DL, get(BccOpc)) 458 .addMBB(TBB) 459 .addImm(Cond[0].getImm()) 460 .add(Cond[1]); 461 return 1; 462 } 463 464 // Two-way conditional branch. 465 BuildMI(&MBB, DL, get(BccOpc)) 466 .addMBB(TBB) 467 .addImm(Cond[0].getImm()) 468 .add(Cond[1]); 469 if (isThumb) 470 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); 471 else 472 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 473 return 2; 474 } 475 476 bool ARMBaseInstrInfo:: 477 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 478 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 479 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 480 return false; 481 } 482 483 bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { 484 if (MI.isBundle()) { 485 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 486 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 487 while (++I != E && I->isInsideBundle()) { 488 int PIdx = I->findFirstPredOperandIdx(); 489 if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) 490 return true; 491 } 492 return false; 493 } 494 495 int PIdx = MI.findFirstPredOperandIdx(); 496 return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL; 497 } 498 499 bool ARMBaseInstrInfo::PredicateInstruction( 500 MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { 501 unsigned Opc = MI.getOpcode(); 502 if (isUncondBranchOpcode(Opc)) { 503 MI.setDesc(get(getMatchingCondBranchOpcode(Opc))); 504 MachineInstrBuilder(*MI.getParent()->getParent(), MI) 505 .addImm(Pred[0].getImm()) 506 .addReg(Pred[1].getReg()); 507 return true; 508 } 509 510 int PIdx = MI.findFirstPredOperandIdx(); 511 if (PIdx != -1) { 512 MachineOperand &PMO = MI.getOperand(PIdx); 513 PMO.setImm(Pred[0].getImm()); 514 MI.getOperand(PIdx+1).setReg(Pred[1].getReg()); 515 return true; 516 } 517 return false; 518 } 519 520 bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, 521 ArrayRef<MachineOperand> Pred2) const { 522 if (Pred1.size() > 2 || Pred2.size() > 2) 523 return false; 524 525 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 526 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 527 if (CC1 == CC2) 528 return true; 529 530 switch (CC1) { 531 default: 532 return false; 533 case ARMCC::AL: 534 return true; 535 case ARMCC::HS: 536 return CC2 == ARMCC::HI; 537 case ARMCC::LS: 538 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 539 case ARMCC::GE: 540 return CC2 == ARMCC::GT; 541 case ARMCC::LE: 542 return CC2 == ARMCC::LT; 543 } 544 } 545 546 bool ARMBaseInstrInfo::DefinesPredicate( 547 MachineInstr &MI, std::vector<MachineOperand> &Pred) const { 548 bool Found = false; 549 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 550 const MachineOperand &MO = MI.getOperand(i); 551 if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || 552 (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { 553 Pred.push_back(MO); 554 Found = true; 555 } 556 } 557 558 return Found; 559 } 560 561 static bool isCPSRDefined(const MachineInstr *MI) { 562 for (const auto &MO : MI->operands()) 563 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) 564 return true; 565 return false; 566 } 567 568 static bool isEligibleForITBlock(const MachineInstr *MI) { 569 switch (MI->getOpcode()) { 570 default: return true; 571 case ARM::tADC: // ADC (register) T1 572 case ARM::tADDi3: // ADD (immediate) T1 573 case ARM::tADDi8: // ADD (immediate) T2 574 case ARM::tADDrr: // ADD (register) T1 575 case ARM::tAND: // AND (register) T1 576 case ARM::tASRri: // ASR (immediate) T1 577 case ARM::tASRrr: // ASR (register) T1 578 case ARM::tBIC: // BIC (register) T1 579 case ARM::tEOR: // EOR (register) T1 580 case ARM::tLSLri: // LSL (immediate) T1 581 case ARM::tLSLrr: // LSL (register) T1 582 case ARM::tLSRri: // LSR (immediate) T1 583 case ARM::tLSRrr: // LSR (register) T1 584 case ARM::tMUL: // MUL T1 585 case ARM::tMVN: // MVN (register) T1 586 case ARM::tORR: // ORR (register) T1 587 case ARM::tROR: // ROR (register) T1 588 case ARM::tRSB: // RSB (immediate) T1 589 case ARM::tSBC: // SBC (register) T1 590 case ARM::tSUBi3: // SUB (immediate) T1 591 case ARM::tSUBi8: // SUB (immediate) T2 592 case ARM::tSUBrr: // SUB (register) T1 593 return !isCPSRDefined(MI); 594 } 595 } 596 597 /// isPredicable - Return true if the specified instruction can be predicated. 598 /// By default, this returns true for every instruction with a 599 /// PredicateOperand. 600 bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { 601 if (!MI.isPredicable()) 602 return false; 603 604 if (MI.isBundle()) 605 return false; 606 607 if (!isEligibleForITBlock(&MI)) 608 return false; 609 610 ARMFunctionInfo *AFI = 611 MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); 612 613 if (AFI->isThumb2Function()) { 614 if (getSubtarget().restrictIT()) 615 return isV8EligibleForIT(&MI); 616 } else { // non-Thumb 617 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) 618 return false; 619 } 620 621 return true; 622 } 623 624 namespace llvm { 625 626 template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { 627 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 628 const MachineOperand &MO = MI->getOperand(i); 629 if (!MO.isReg() || MO.isUndef() || MO.isUse()) 630 continue; 631 if (MO.getReg() != ARM::CPSR) 632 continue; 633 if (!MO.isDead()) 634 return false; 635 } 636 // all definitions of CPSR are dead 637 return true; 638 } 639 640 } // end namespace llvm 641 642 /// GetInstSize - Return the size of the specified MachineInstr. 643 /// 644 unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 645 const MachineBasicBlock &MBB = *MI.getParent(); 646 const MachineFunction *MF = MBB.getParent(); 647 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 648 649 const MCInstrDesc &MCID = MI.getDesc(); 650 if (MCID.getSize()) 651 return MCID.getSize(); 652 653 // If this machine instr is an inline asm, measure it. 654 if (MI.getOpcode() == ARM::INLINEASM) 655 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); 656 unsigned Opc = MI.getOpcode(); 657 switch (Opc) { 658 default: 659 // pseudo-instruction sizes are zero. 660 return 0; 661 case TargetOpcode::BUNDLE: 662 return getInstBundleLength(MI); 663 case ARM::MOVi16_ga_pcrel: 664 case ARM::MOVTi16_ga_pcrel: 665 case ARM::t2MOVi16_ga_pcrel: 666 case ARM::t2MOVTi16_ga_pcrel: 667 return 4; 668 case ARM::MOVi32imm: 669 case ARM::t2MOVi32imm: 670 return 8; 671 case ARM::CONSTPOOL_ENTRY: 672 case ARM::JUMPTABLE_INSTS: 673 case ARM::JUMPTABLE_ADDRS: 674 case ARM::JUMPTABLE_TBB: 675 case ARM::JUMPTABLE_TBH: 676 // If this machine instr is a constant pool entry, its size is recorded as 677 // operand #2. 678 return MI.getOperand(2).getImm(); 679 case ARM::Int_eh_sjlj_longjmp: 680 return 16; 681 case ARM::tInt_eh_sjlj_longjmp: 682 return 10; 683 case ARM::tInt_WIN_eh_sjlj_longjmp: 684 return 12; 685 case ARM::Int_eh_sjlj_setjmp: 686 case ARM::Int_eh_sjlj_setjmp_nofp: 687 return 20; 688 case ARM::tInt_eh_sjlj_setjmp: 689 case ARM::t2Int_eh_sjlj_setjmp: 690 case ARM::t2Int_eh_sjlj_setjmp_nofp: 691 return 12; 692 case ARM::SPACE: 693 return MI.getOperand(1).getImm(); 694 } 695 } 696 697 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 698 unsigned Size = 0; 699 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 700 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 701 while (++I != E && I->isInsideBundle()) { 702 assert(!I->isBundle() && "No nested bundle!"); 703 Size += getInstSizeInBytes(*I); 704 } 705 return Size; 706 } 707 708 void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, 709 MachineBasicBlock::iterator I, 710 unsigned DestReg, bool KillSrc, 711 const ARMSubtarget &Subtarget) const { 712 unsigned Opc = Subtarget.isThumb() 713 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) 714 : ARM::MRS; 715 716 MachineInstrBuilder MIB = 717 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); 718 719 // There is only 1 A/R class MRS instruction, and it always refers to 720 // APSR. However, there are lots of other possibilities on M-class cores. 721 if (Subtarget.isMClass()) 722 MIB.addImm(0x800); 723 724 MIB.add(predOps(ARMCC::AL)) 725 .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); 726 } 727 728 void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, 729 MachineBasicBlock::iterator I, 730 unsigned SrcReg, bool KillSrc, 731 const ARMSubtarget &Subtarget) const { 732 unsigned Opc = Subtarget.isThumb() 733 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) 734 : ARM::MSR; 735 736 MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); 737 738 if (Subtarget.isMClass()) 739 MIB.addImm(0x800); 740 else 741 MIB.addImm(8); 742 743 MIB.addReg(SrcReg, getKillRegState(KillSrc)) 744 .add(predOps(ARMCC::AL)) 745 .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); 746 } 747 748 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 749 MachineBasicBlock::iterator I, 750 const DebugLoc &DL, unsigned DestReg, 751 unsigned SrcReg, bool KillSrc) const { 752 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 753 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 754 755 if (GPRDest && GPRSrc) { 756 BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 757 .addReg(SrcReg, getKillRegState(KillSrc)) 758 .add(predOps(ARMCC::AL)) 759 .add(condCodeOp()); 760 return; 761 } 762 763 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 764 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 765 766 unsigned Opc = 0; 767 if (SPRDest && SPRSrc) 768 Opc = ARM::VMOVS; 769 else if (GPRDest && SPRSrc) 770 Opc = ARM::VMOVRS; 771 else if (SPRDest && GPRSrc) 772 Opc = ARM::VMOVSR; 773 else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) 774 Opc = ARM::VMOVD; 775 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 776 Opc = ARM::VORRq; 777 778 if (Opc) { 779 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 780 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 781 if (Opc == ARM::VORRq) 782 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 783 MIB.add(predOps(ARMCC::AL)); 784 return; 785 } 786 787 // Handle register classes that require multiple instructions. 788 unsigned BeginIdx = 0; 789 unsigned SubRegs = 0; 790 int Spacing = 1; 791 792 // Use VORRq when possible. 793 if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { 794 Opc = ARM::VORRq; 795 BeginIdx = ARM::qsub_0; 796 SubRegs = 2; 797 } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 798 Opc = ARM::VORRq; 799 BeginIdx = ARM::qsub_0; 800 SubRegs = 4; 801 // Fall back to VMOVD. 802 } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { 803 Opc = ARM::VMOVD; 804 BeginIdx = ARM::dsub_0; 805 SubRegs = 2; 806 } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { 807 Opc = ARM::VMOVD; 808 BeginIdx = ARM::dsub_0; 809 SubRegs = 3; 810 } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { 811 Opc = ARM::VMOVD; 812 BeginIdx = ARM::dsub_0; 813 SubRegs = 4; 814 } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { 815 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; 816 BeginIdx = ARM::gsub_0; 817 SubRegs = 2; 818 } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { 819 Opc = ARM::VMOVD; 820 BeginIdx = ARM::dsub_0; 821 SubRegs = 2; 822 Spacing = 2; 823 } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { 824 Opc = ARM::VMOVD; 825 BeginIdx = ARM::dsub_0; 826 SubRegs = 3; 827 Spacing = 2; 828 } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { 829 Opc = ARM::VMOVD; 830 BeginIdx = ARM::dsub_0; 831 SubRegs = 4; 832 Spacing = 2; 833 } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { 834 Opc = ARM::VMOVS; 835 BeginIdx = ARM::ssub_0; 836 SubRegs = 2; 837 } else if (SrcReg == ARM::CPSR) { 838 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); 839 return; 840 } else if (DestReg == ARM::CPSR) { 841 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); 842 return; 843 } 844 845 assert(Opc && "Impossible reg-to-reg copy"); 846 847 const TargetRegisterInfo *TRI = &getRegisterInfo(); 848 MachineInstrBuilder Mov; 849 850 // Copy register tuples backward when the first Dest reg overlaps with SrcReg. 851 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { 852 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); 853 Spacing = -Spacing; 854 } 855 #ifndef NDEBUG 856 SmallSet<unsigned, 4> DstRegs; 857 #endif 858 for (unsigned i = 0; i != SubRegs; ++i) { 859 unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); 860 unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); 861 assert(Dst && Src && "Bad sub-register"); 862 #ifndef NDEBUG 863 assert(!DstRegs.count(Src) && "destructive vector copy"); 864 DstRegs.insert(Dst); 865 #endif 866 Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); 867 // VORR takes two source operands. 868 if (Opc == ARM::VORRq) 869 Mov.addReg(Src); 870 Mov = Mov.add(predOps(ARMCC::AL)); 871 // MOVr can set CC. 872 if (Opc == ARM::MOVr) 873 Mov = Mov.add(condCodeOp()); 874 } 875 // Add implicit super-register defs and kills to the last instruction. 876 Mov->addRegisterDefined(DestReg, TRI); 877 if (KillSrc) 878 Mov->addRegisterKilled(SrcReg, TRI); 879 } 880 881 const MachineInstrBuilder & 882 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, 883 unsigned SubIdx, unsigned State, 884 const TargetRegisterInfo *TRI) const { 885 if (!SubIdx) 886 return MIB.addReg(Reg, State); 887 888 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 889 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 890 return MIB.addReg(Reg, State, SubIdx); 891 } 892 893 void ARMBaseInstrInfo:: 894 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 895 unsigned SrcReg, bool isKill, int FI, 896 const TargetRegisterClass *RC, 897 const TargetRegisterInfo *TRI) const { 898 DebugLoc DL; 899 if (I != MBB.end()) DL = I->getDebugLoc(); 900 MachineFunction &MF = *MBB.getParent(); 901 MachineFrameInfo &MFI = MF.getFrameInfo(); 902 unsigned Align = MFI.getObjectAlignment(FI); 903 904 MachineMemOperand *MMO = MF.getMachineMemOperand( 905 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, 906 MFI.getObjectSize(FI), Align); 907 908 switch (RC->getSize()) { 909 case 4: 910 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 911 BuildMI(MBB, I, DL, get(ARM::STRi12)) 912 .addReg(SrcReg, getKillRegState(isKill)) 913 .addFrameIndex(FI) 914 .addImm(0) 915 .addMemOperand(MMO) 916 .add(predOps(ARMCC::AL)); 917 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 918 BuildMI(MBB, I, DL, get(ARM::VSTRS)) 919 .addReg(SrcReg, getKillRegState(isKill)) 920 .addFrameIndex(FI) 921 .addImm(0) 922 .addMemOperand(MMO) 923 .add(predOps(ARMCC::AL)); 924 } else 925 llvm_unreachable("Unknown reg class!"); 926 break; 927 case 8: 928 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 929 BuildMI(MBB, I, DL, get(ARM::VSTRD)) 930 .addReg(SrcReg, getKillRegState(isKill)) 931 .addFrameIndex(FI) 932 .addImm(0) 933 .addMemOperand(MMO) 934 .add(predOps(ARMCC::AL)); 935 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 936 if (Subtarget.hasV5TEOps()) { 937 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); 938 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 939 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 940 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 941 .add(predOps(ARMCC::AL)); 942 } else { 943 // Fallback to STM instruction, which has existed since the dawn of 944 // time. 945 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA)) 946 .addFrameIndex(FI) 947 .addMemOperand(MMO) 948 .add(predOps(ARMCC::AL)); 949 AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); 950 AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); 951 } 952 } else 953 llvm_unreachable("Unknown reg class!"); 954 break; 955 case 16: 956 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 957 // Use aligned spills if the stack can be realigned. 958 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 959 BuildMI(MBB, I, DL, get(ARM::VST1q64)) 960 .addFrameIndex(FI) 961 .addImm(16) 962 .addReg(SrcReg, getKillRegState(isKill)) 963 .addMemOperand(MMO) 964 .add(predOps(ARMCC::AL)); 965 } else { 966 BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 967 .addReg(SrcReg, getKillRegState(isKill)) 968 .addFrameIndex(FI) 969 .addMemOperand(MMO) 970 .add(predOps(ARMCC::AL)); 971 } 972 } else 973 llvm_unreachable("Unknown reg class!"); 974 break; 975 case 24: 976 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 977 // Use aligned spills if the stack can be realigned. 978 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 979 BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) 980 .addFrameIndex(FI) 981 .addImm(16) 982 .addReg(SrcReg, getKillRegState(isKill)) 983 .addMemOperand(MMO) 984 .add(predOps(ARMCC::AL)); 985 } else { 986 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 987 .addFrameIndex(FI) 988 .add(predOps(ARMCC::AL)) 989 .addMemOperand(MMO); 990 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 991 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 992 AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 993 } 994 } else 995 llvm_unreachable("Unknown reg class!"); 996 break; 997 case 32: 998 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 999 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1000 // FIXME: It's possible to only store part of the QQ register if the 1001 // spilled def has a sub-register index. 1002 BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 1003 .addFrameIndex(FI) 1004 .addImm(16) 1005 .addReg(SrcReg, getKillRegState(isKill)) 1006 .addMemOperand(MMO) 1007 .add(predOps(ARMCC::AL)); 1008 } else { 1009 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 1010 .addFrameIndex(FI) 1011 .add(predOps(ARMCC::AL)) 1012 .addMemOperand(MMO); 1013 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1014 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1015 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1016 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1017 } 1018 } else 1019 llvm_unreachable("Unknown reg class!"); 1020 break; 1021 case 64: 1022 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1023 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 1024 .addFrameIndex(FI) 1025 .add(predOps(ARMCC::AL)) 1026 .addMemOperand(MMO); 1027 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 1028 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 1029 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 1030 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 1031 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 1032 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 1033 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 1034 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 1035 } else 1036 llvm_unreachable("Unknown reg class!"); 1037 break; 1038 default: 1039 llvm_unreachable("Unknown reg class!"); 1040 } 1041 } 1042 1043 unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 1044 int &FrameIndex) const { 1045 switch (MI.getOpcode()) { 1046 default: break; 1047 case ARM::STRrs: 1048 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 1049 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1050 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1051 MI.getOperand(3).getImm() == 0) { 1052 FrameIndex = MI.getOperand(1).getIndex(); 1053 return MI.getOperand(0).getReg(); 1054 } 1055 break; 1056 case ARM::STRi12: 1057 case ARM::t2STRi12: 1058 case ARM::tSTRspi: 1059 case ARM::VSTRD: 1060 case ARM::VSTRS: 1061 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1062 MI.getOperand(2).getImm() == 0) { 1063 FrameIndex = MI.getOperand(1).getIndex(); 1064 return MI.getOperand(0).getReg(); 1065 } 1066 break; 1067 case ARM::VST1q64: 1068 case ARM::VST1d64TPseudo: 1069 case ARM::VST1d64QPseudo: 1070 if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) { 1071 FrameIndex = MI.getOperand(0).getIndex(); 1072 return MI.getOperand(2).getReg(); 1073 } 1074 break; 1075 case ARM::VSTMQIA: 1076 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1077 FrameIndex = MI.getOperand(1).getIndex(); 1078 return MI.getOperand(0).getReg(); 1079 } 1080 break; 1081 } 1082 1083 return 0; 1084 } 1085 1086 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, 1087 int &FrameIndex) const { 1088 const MachineMemOperand *Dummy; 1089 return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 1090 } 1091 1092 void ARMBaseInstrInfo:: 1093 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 1094 unsigned DestReg, int FI, 1095 const TargetRegisterClass *RC, 1096 const TargetRegisterInfo *TRI) const { 1097 DebugLoc DL; 1098 if (I != MBB.end()) DL = I->getDebugLoc(); 1099 MachineFunction &MF = *MBB.getParent(); 1100 MachineFrameInfo &MFI = MF.getFrameInfo(); 1101 unsigned Align = MFI.getObjectAlignment(FI); 1102 MachineMemOperand *MMO = MF.getMachineMemOperand( 1103 MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, 1104 MFI.getObjectSize(FI), Align); 1105 1106 switch (RC->getSize()) { 1107 case 4: 1108 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 1109 BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 1110 .addFrameIndex(FI) 1111 .addImm(0) 1112 .addMemOperand(MMO) 1113 .add(predOps(ARMCC::AL)); 1114 1115 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 1116 BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 1117 .addFrameIndex(FI) 1118 .addImm(0) 1119 .addMemOperand(MMO) 1120 .add(predOps(ARMCC::AL)); 1121 } else 1122 llvm_unreachable("Unknown reg class!"); 1123 break; 1124 case 8: 1125 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 1126 BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 1127 .addFrameIndex(FI) 1128 .addImm(0) 1129 .addMemOperand(MMO) 1130 .add(predOps(ARMCC::AL)); 1131 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { 1132 MachineInstrBuilder MIB; 1133 1134 if (Subtarget.hasV5TEOps()) { 1135 MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); 1136 AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1137 AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1138 MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) 1139 .add(predOps(ARMCC::AL)); 1140 } else { 1141 // Fallback to LDM instruction, which has existed since the dawn of 1142 // time. 1143 MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) 1144 .addFrameIndex(FI) 1145 .addMemOperand(MMO) 1146 .add(predOps(ARMCC::AL)); 1147 MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); 1148 MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); 1149 } 1150 1151 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1152 MIB.addReg(DestReg, RegState::ImplicitDefine); 1153 } else 1154 llvm_unreachable("Unknown reg class!"); 1155 break; 1156 case 16: 1157 if (ARM::DPairRegClass.hasSubClassEq(RC)) { 1158 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1159 BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) 1160 .addFrameIndex(FI) 1161 .addImm(16) 1162 .addMemOperand(MMO) 1163 .add(predOps(ARMCC::AL)); 1164 } else { 1165 BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 1166 .addFrameIndex(FI) 1167 .addMemOperand(MMO) 1168 .add(predOps(ARMCC::AL)); 1169 } 1170 } else 1171 llvm_unreachable("Unknown reg class!"); 1172 break; 1173 case 24: 1174 if (ARM::DTripleRegClass.hasSubClassEq(RC)) { 1175 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1176 BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) 1177 .addFrameIndex(FI) 1178 .addImm(16) 1179 .addMemOperand(MMO) 1180 .add(predOps(ARMCC::AL)); 1181 } else { 1182 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1183 .addFrameIndex(FI) 1184 .addMemOperand(MMO) 1185 .add(predOps(ARMCC::AL)); 1186 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1187 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1188 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1189 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1190 MIB.addReg(DestReg, RegState::ImplicitDefine); 1191 } 1192 } else 1193 llvm_unreachable("Unknown reg class!"); 1194 break; 1195 case 32: 1196 if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { 1197 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 1198 BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 1199 .addFrameIndex(FI) 1200 .addImm(16) 1201 .addMemOperand(MMO) 1202 .add(predOps(ARMCC::AL)); 1203 } else { 1204 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1205 .addFrameIndex(FI) 1206 .add(predOps(ARMCC::AL)) 1207 .addMemOperand(MMO); 1208 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1209 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1210 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1211 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1212 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1213 MIB.addReg(DestReg, RegState::ImplicitDefine); 1214 } 1215 } else 1216 llvm_unreachable("Unknown reg class!"); 1217 break; 1218 case 64: 1219 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 1220 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 1221 .addFrameIndex(FI) 1222 .add(predOps(ARMCC::AL)) 1223 .addMemOperand(MMO); 1224 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); 1225 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); 1226 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); 1227 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); 1228 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); 1229 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); 1230 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); 1231 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); 1232 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) 1233 MIB.addReg(DestReg, RegState::ImplicitDefine); 1234 } else 1235 llvm_unreachable("Unknown reg class!"); 1236 break; 1237 default: 1238 llvm_unreachable("Unknown regclass!"); 1239 } 1240 } 1241 1242 unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 1243 int &FrameIndex) const { 1244 switch (MI.getOpcode()) { 1245 default: break; 1246 case ARM::LDRrs: 1247 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 1248 if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() && 1249 MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 && 1250 MI.getOperand(3).getImm() == 0) { 1251 FrameIndex = MI.getOperand(1).getIndex(); 1252 return MI.getOperand(0).getReg(); 1253 } 1254 break; 1255 case ARM::LDRi12: 1256 case ARM::t2LDRi12: 1257 case ARM::tLDRspi: 1258 case ARM::VLDRD: 1259 case ARM::VLDRS: 1260 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 1261 MI.getOperand(2).getImm() == 0) { 1262 FrameIndex = MI.getOperand(1).getIndex(); 1263 return MI.getOperand(0).getReg(); 1264 } 1265 break; 1266 case ARM::VLD1q64: 1267 case ARM::VLD1d64TPseudo: 1268 case ARM::VLD1d64QPseudo: 1269 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1270 FrameIndex = MI.getOperand(1).getIndex(); 1271 return MI.getOperand(0).getReg(); 1272 } 1273 break; 1274 case ARM::VLDMQIA: 1275 if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) { 1276 FrameIndex = MI.getOperand(1).getIndex(); 1277 return MI.getOperand(0).getReg(); 1278 } 1279 break; 1280 } 1281 1282 return 0; 1283 } 1284 1285 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, 1286 int &FrameIndex) const { 1287 const MachineMemOperand *Dummy; 1288 return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 1289 } 1290 1291 /// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD 1292 /// depending on whether the result is used. 1293 void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { 1294 bool isThumb1 = Subtarget.isThumb1Only(); 1295 bool isThumb2 = Subtarget.isThumb2(); 1296 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); 1297 1298 DebugLoc dl = MI->getDebugLoc(); 1299 MachineBasicBlock *BB = MI->getParent(); 1300 1301 MachineInstrBuilder LDM, STM; 1302 if (isThumb1 || !MI->getOperand(1).isDead()) { 1303 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD 1304 : isThumb1 ? ARM::tLDMIA_UPD 1305 : ARM::LDMIA_UPD)) 1306 .add(MI->getOperand(1)); 1307 } else { 1308 LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); 1309 } 1310 1311 if (isThumb1 || !MI->getOperand(0).isDead()) { 1312 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD 1313 : isThumb1 ? ARM::tSTMIA_UPD 1314 : ARM::STMIA_UPD)) 1315 .add(MI->getOperand(0)); 1316 } else { 1317 STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); 1318 } 1319 1320 LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); 1321 STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); 1322 1323 // Sort the scratch registers into ascending order. 1324 const TargetRegisterInfo &TRI = getRegisterInfo(); 1325 SmallVector<unsigned, 6> ScratchRegs; 1326 for(unsigned I = 5; I < MI->getNumOperands(); ++I) 1327 ScratchRegs.push_back(MI->getOperand(I).getReg()); 1328 std::sort(ScratchRegs.begin(), ScratchRegs.end(), 1329 [&TRI](const unsigned &Reg1, 1330 const unsigned &Reg2) -> bool { 1331 return TRI.getEncodingValue(Reg1) < 1332 TRI.getEncodingValue(Reg2); 1333 }); 1334 1335 for (const auto &Reg : ScratchRegs) { 1336 LDM.addReg(Reg, RegState::Define); 1337 STM.addReg(Reg, RegState::Kill); 1338 } 1339 1340 BB->erase(MI); 1341 } 1342 1343 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 1344 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { 1345 assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && 1346 "LOAD_STACK_GUARD currently supported only for MachO."); 1347 expandLoadStackGuard(MI); 1348 MI.getParent()->erase(MI); 1349 return true; 1350 } 1351 1352 if (MI.getOpcode() == ARM::MEMCPY) { 1353 expandMEMCPY(MI); 1354 return true; 1355 } 1356 1357 // This hook gets to expand COPY instructions before they become 1358 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 1359 // widened to VMOVD. We prefer the VMOVD when possible because it may be 1360 // changed into a VORR that can go down the NEON pipeline. 1361 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP()) 1362 return false; 1363 1364 // Look for a copy between even S-registers. That is where we keep floats 1365 // when using NEON v2f32 instructions for f32 arithmetic. 1366 unsigned DstRegS = MI.getOperand(0).getReg(); 1367 unsigned SrcRegS = MI.getOperand(1).getReg(); 1368 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1369 return false; 1370 1371 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1372 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1373 &ARM::DPRRegClass); 1374 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1375 &ARM::DPRRegClass); 1376 if (!DstRegD || !SrcRegD) 1377 return false; 1378 1379 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1380 // legal if the COPY already defines the full DstRegD, and it isn't a 1381 // sub-register insertion. 1382 if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI)) 1383 return false; 1384 1385 // A dead copy shouldn't show up here, but reject it just in case. 1386 if (MI.getOperand(0).isDead()) 1387 return false; 1388 1389 // All clear, widen the COPY. 1390 DEBUG(dbgs() << "widening: " << MI); 1391 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 1392 1393 // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 1394 // or some other super-register. 1395 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); 1396 if (ImpDefIdx != -1) 1397 MI.RemoveOperand(ImpDefIdx); 1398 1399 // Change the opcode and operands. 1400 MI.setDesc(get(ARM::VMOVD)); 1401 MI.getOperand(0).setReg(DstRegD); 1402 MI.getOperand(1).setReg(SrcRegD); 1403 MIB.add(predOps(ARMCC::AL)); 1404 1405 // We are now reading SrcRegD instead of SrcRegS. This may upset the 1406 // register scavenger and machine verifier, so we need to indicate that we 1407 // are reading an undefined value from SrcRegD, but a proper value from 1408 // SrcRegS. 1409 MI.getOperand(1).setIsUndef(); 1410 MIB.addReg(SrcRegS, RegState::Implicit); 1411 1412 // SrcRegD may actually contain an unrelated value in the ssub_1 1413 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1414 if (MI.getOperand(1).isKill()) { 1415 MI.getOperand(1).setIsKill(false); 1416 MI.addRegisterKilled(SrcRegS, TRI, true); 1417 } 1418 1419 DEBUG(dbgs() << "replaced by: " << MI); 1420 return true; 1421 } 1422 1423 /// Create a copy of a const pool value. Update CPI to the new index and return 1424 /// the label UID. 1425 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1426 MachineConstantPool *MCP = MF.getConstantPool(); 1427 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1428 1429 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1430 assert(MCPE.isMachineConstantPoolEntry() && 1431 "Expecting a machine constantpool entry!"); 1432 ARMConstantPoolValue *ACPV = 1433 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1434 1435 unsigned PCLabelId = AFI->createPICLabelUId(); 1436 ARMConstantPoolValue *NewCPV = nullptr; 1437 1438 // FIXME: The below assumes PIC relocation model and that the function 1439 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1440 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1441 // instructions, so that's probably OK, but is PIC always correct when 1442 // we get here? 1443 if (ACPV->isGlobalValue()) 1444 NewCPV = ARMConstantPoolConstant::Create( 1445 cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, 1446 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); 1447 else if (ACPV->isExtSymbol()) 1448 NewCPV = ARMConstantPoolSymbol:: 1449 Create(MF.getFunction()->getContext(), 1450 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1451 else if (ACPV->isBlockAddress()) 1452 NewCPV = ARMConstantPoolConstant:: 1453 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1454 ARMCP::CPBlockAddress, 4); 1455 else if (ACPV->isLSDA()) 1456 NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 1457 ARMCP::CPLSDA, 4); 1458 else if (ACPV->isMachineBasicBlock()) 1459 NewCPV = ARMConstantPoolMBB:: 1460 Create(MF.getFunction()->getContext(), 1461 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1462 else 1463 llvm_unreachable("Unexpected ARM constantpool value type!!"); 1464 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1465 return PCLabelId; 1466 } 1467 1468 void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, 1469 MachineBasicBlock::iterator I, 1470 unsigned DestReg, unsigned SubIdx, 1471 const MachineInstr &Orig, 1472 const TargetRegisterInfo &TRI) const { 1473 unsigned Opcode = Orig.getOpcode(); 1474 switch (Opcode) { 1475 default: { 1476 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 1477 MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); 1478 MBB.insert(I, MI); 1479 break; 1480 } 1481 case ARM::tLDRpci_pic: 1482 case ARM::t2LDRpci_pic: { 1483 MachineFunction &MF = *MBB.getParent(); 1484 unsigned CPI = Orig.getOperand(1).getIndex(); 1485 unsigned PCLabelId = duplicateCPV(MF, CPI); 1486 MachineInstrBuilder MIB = 1487 BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg) 1488 .addConstantPoolIndex(CPI) 1489 .addImm(PCLabelId); 1490 MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end()); 1491 break; 1492 } 1493 } 1494 } 1495 1496 MachineInstr *ARMBaseInstrInfo::duplicate(MachineInstr &Orig, 1497 MachineFunction &MF) const { 1498 MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); 1499 switch (Orig.getOpcode()) { 1500 case ARM::tLDRpci_pic: 1501 case ARM::t2LDRpci_pic: { 1502 unsigned CPI = Orig.getOperand(1).getIndex(); 1503 unsigned PCLabelId = duplicateCPV(MF, CPI); 1504 Orig.getOperand(1).setIndex(CPI); 1505 Orig.getOperand(2).setImm(PCLabelId); 1506 break; 1507 } 1508 } 1509 return MI; 1510 } 1511 1512 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, 1513 const MachineInstr &MI1, 1514 const MachineRegisterInfo *MRI) const { 1515 unsigned Opcode = MI0.getOpcode(); 1516 if (Opcode == ARM::t2LDRpci || 1517 Opcode == ARM::t2LDRpci_pic || 1518 Opcode == ARM::tLDRpci || 1519 Opcode == ARM::tLDRpci_pic || 1520 Opcode == ARM::LDRLIT_ga_pcrel || 1521 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1522 Opcode == ARM::tLDRLIT_ga_pcrel || 1523 Opcode == ARM::MOV_ga_pcrel || 1524 Opcode == ARM::MOV_ga_pcrel_ldr || 1525 Opcode == ARM::t2MOV_ga_pcrel) { 1526 if (MI1.getOpcode() != Opcode) 1527 return false; 1528 if (MI0.getNumOperands() != MI1.getNumOperands()) 1529 return false; 1530 1531 const MachineOperand &MO0 = MI0.getOperand(1); 1532 const MachineOperand &MO1 = MI1.getOperand(1); 1533 if (MO0.getOffset() != MO1.getOffset()) 1534 return false; 1535 1536 if (Opcode == ARM::LDRLIT_ga_pcrel || 1537 Opcode == ARM::LDRLIT_ga_pcrel_ldr || 1538 Opcode == ARM::tLDRLIT_ga_pcrel || 1539 Opcode == ARM::MOV_ga_pcrel || 1540 Opcode == ARM::MOV_ga_pcrel_ldr || 1541 Opcode == ARM::t2MOV_ga_pcrel) 1542 // Ignore the PC labels. 1543 return MO0.getGlobal() == MO1.getGlobal(); 1544 1545 const MachineFunction *MF = MI0.getParent()->getParent(); 1546 const MachineConstantPool *MCP = MF->getConstantPool(); 1547 int CPI0 = MO0.getIndex(); 1548 int CPI1 = MO1.getIndex(); 1549 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1550 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1551 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1552 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1553 if (isARMCP0 && isARMCP1) { 1554 ARMConstantPoolValue *ACPV0 = 1555 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1556 ARMConstantPoolValue *ACPV1 = 1557 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1558 return ACPV0->hasSameValue(ACPV1); 1559 } else if (!isARMCP0 && !isARMCP1) { 1560 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1561 } 1562 return false; 1563 } else if (Opcode == ARM::PICLDR) { 1564 if (MI1.getOpcode() != Opcode) 1565 return false; 1566 if (MI0.getNumOperands() != MI1.getNumOperands()) 1567 return false; 1568 1569 unsigned Addr0 = MI0.getOperand(1).getReg(); 1570 unsigned Addr1 = MI1.getOperand(1).getReg(); 1571 if (Addr0 != Addr1) { 1572 if (!MRI || 1573 !TargetRegisterInfo::isVirtualRegister(Addr0) || 1574 !TargetRegisterInfo::isVirtualRegister(Addr1)) 1575 return false; 1576 1577 // This assumes SSA form. 1578 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1579 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1580 // Check if the loaded value, e.g. a constantpool of a global address, are 1581 // the same. 1582 if (!produceSameValue(*Def0, *Def1, MRI)) 1583 return false; 1584 } 1585 1586 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { 1587 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1588 const MachineOperand &MO0 = MI0.getOperand(i); 1589 const MachineOperand &MO1 = MI1.getOperand(i); 1590 if (!MO0.isIdenticalTo(MO1)) 1591 return false; 1592 } 1593 return true; 1594 } 1595 1596 return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1597 } 1598 1599 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1600 /// determine if two loads are loading from the same base address. It should 1601 /// only return true if the base pointers are the same and the only differences 1602 /// between the two addresses is the offset. It also returns the offsets by 1603 /// reference. 1604 /// 1605 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1606 /// is permanently disabled. 1607 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1608 int64_t &Offset1, 1609 int64_t &Offset2) const { 1610 // Don't worry about Thumb: just ARM and Thumb2. 1611 if (Subtarget.isThumb1Only()) return false; 1612 1613 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1614 return false; 1615 1616 switch (Load1->getMachineOpcode()) { 1617 default: 1618 return false; 1619 case ARM::LDRi12: 1620 case ARM::LDRBi12: 1621 case ARM::LDRD: 1622 case ARM::LDRH: 1623 case ARM::LDRSB: 1624 case ARM::LDRSH: 1625 case ARM::VLDRD: 1626 case ARM::VLDRS: 1627 case ARM::t2LDRi8: 1628 case ARM::t2LDRBi8: 1629 case ARM::t2LDRDi8: 1630 case ARM::t2LDRSHi8: 1631 case ARM::t2LDRi12: 1632 case ARM::t2LDRBi12: 1633 case ARM::t2LDRSHi12: 1634 break; 1635 } 1636 1637 switch (Load2->getMachineOpcode()) { 1638 default: 1639 return false; 1640 case ARM::LDRi12: 1641 case ARM::LDRBi12: 1642 case ARM::LDRD: 1643 case ARM::LDRH: 1644 case ARM::LDRSB: 1645 case ARM::LDRSH: 1646 case ARM::VLDRD: 1647 case ARM::VLDRS: 1648 case ARM::t2LDRi8: 1649 case ARM::t2LDRBi8: 1650 case ARM::t2LDRSHi8: 1651 case ARM::t2LDRi12: 1652 case ARM::t2LDRBi12: 1653 case ARM::t2LDRSHi12: 1654 break; 1655 } 1656 1657 // Check if base addresses and chain operands match. 1658 if (Load1->getOperand(0) != Load2->getOperand(0) || 1659 Load1->getOperand(4) != Load2->getOperand(4)) 1660 return false; 1661 1662 // Index should be Reg0. 1663 if (Load1->getOperand(3) != Load2->getOperand(3)) 1664 return false; 1665 1666 // Determine the offsets. 1667 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1668 isa<ConstantSDNode>(Load2->getOperand(1))) { 1669 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1670 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1671 return true; 1672 } 1673 1674 return false; 1675 } 1676 1677 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1678 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1679 /// be scheduled togther. On some targets if two loads are loading from 1680 /// addresses in the same cache line, it's better if they are scheduled 1681 /// together. This function takes two integers that represent the load offsets 1682 /// from the common base address. It returns true if it decides it's desirable 1683 /// to schedule the two loads together. "NumLoads" is the number of loads that 1684 /// have already been scheduled after Load1. 1685 /// 1686 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched 1687 /// is permanently disabled. 1688 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1689 int64_t Offset1, int64_t Offset2, 1690 unsigned NumLoads) const { 1691 // Don't worry about Thumb: just ARM and Thumb2. 1692 if (Subtarget.isThumb1Only()) return false; 1693 1694 assert(Offset2 > Offset1); 1695 1696 if ((Offset2 - Offset1) / 8 > 64) 1697 return false; 1698 1699 // Check if the machine opcodes are different. If they are different 1700 // then we consider them to not be of the same base address, 1701 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. 1702 // In this case, they are considered to be the same because they are different 1703 // encoding forms of the same basic instruction. 1704 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && 1705 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && 1706 Load2->getMachineOpcode() == ARM::t2LDRBi12) || 1707 (Load1->getMachineOpcode() == ARM::t2LDRBi12 && 1708 Load2->getMachineOpcode() == ARM::t2LDRBi8))) 1709 return false; // FIXME: overly conservative? 1710 1711 // Four loads in a row should be sufficient. 1712 if (NumLoads >= 3) 1713 return false; 1714 1715 return true; 1716 } 1717 1718 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1719 const MachineBasicBlock *MBB, 1720 const MachineFunction &MF) const { 1721 // Debug info is never a scheduling boundary. It's necessary to be explicit 1722 // due to the special treatment of IT instructions below, otherwise a 1723 // dbg_value followed by an IT will result in the IT instruction being 1724 // considered a scheduling hazard, which is wrong. It should be the actual 1725 // instruction preceding the dbg_value instruction(s), just like it is 1726 // when debug info is not present. 1727 if (MI.isDebugValue()) 1728 return false; 1729 1730 // Terminators and labels can't be scheduled around. 1731 if (MI.isTerminator() || MI.isPosition()) 1732 return true; 1733 1734 // Treat the start of the IT block as a scheduling boundary, but schedule 1735 // t2IT along with all instructions following it. 1736 // FIXME: This is a big hammer. But the alternative is to add all potential 1737 // true and anti dependencies to IT block instructions as implicit operands 1738 // to the t2IT instruction. The added compile time and complexity does not 1739 // seem worth it. 1740 MachineBasicBlock::const_iterator I = MI; 1741 // Make sure to skip any dbg_value instructions 1742 while (++I != MBB->end() && I->isDebugValue()) 1743 ; 1744 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1745 return true; 1746 1747 // Don't attempt to schedule around any instruction that defines 1748 // a stack-oriented pointer, as it's unlikely to be profitable. This 1749 // saves compile time, because it doesn't require every single 1750 // stack slot reference to depend on the instruction that does the 1751 // modification. 1752 // Calls don't actually change the stack pointer, even if they have imp-defs. 1753 // No ARM calling conventions change the stack pointer. (X86 calling 1754 // conventions sometimes do). 1755 if (!MI.isCall() && MI.definesRegister(ARM::SP)) 1756 return true; 1757 1758 return false; 1759 } 1760 1761 bool ARMBaseInstrInfo:: 1762 isProfitableToIfCvt(MachineBasicBlock &MBB, 1763 unsigned NumCycles, unsigned ExtraPredCycles, 1764 BranchProbability Probability) const { 1765 if (!NumCycles) 1766 return false; 1767 1768 // If we are optimizing for size, see if the branch in the predecessor can be 1769 // lowered to cbn?z by the constant island lowering pass, and return false if 1770 // so. This results in a shorter instruction sequence. 1771 if (MBB.getParent()->getFunction()->optForSize()) { 1772 MachineBasicBlock *Pred = *MBB.pred_begin(); 1773 if (!Pred->empty()) { 1774 MachineInstr *LastMI = &*Pred->rbegin(); 1775 if (LastMI->getOpcode() == ARM::t2Bcc) { 1776 MachineBasicBlock::iterator CmpMI = LastMI; 1777 if (CmpMI != Pred->begin()) { 1778 --CmpMI; 1779 if (CmpMI->getOpcode() == ARM::tCMPi8 || 1780 CmpMI->getOpcode() == ARM::t2CMPri) { 1781 unsigned Reg = CmpMI->getOperand(0).getReg(); 1782 unsigned PredReg = 0; 1783 ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg); 1784 if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && 1785 isARMLowRegister(Reg)) 1786 return false; 1787 } 1788 } 1789 } 1790 } 1791 } 1792 1793 // Attempt to estimate the relative costs of predication versus branching. 1794 // Here we scale up each component of UnpredCost to avoid precision issue when 1795 // scaling NumCycles by Probability. 1796 const unsigned ScalingUpFactor = 1024; 1797 unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); 1798 UnpredCost += ScalingUpFactor; // The branch itself 1799 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; 1800 1801 return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; 1802 } 1803 1804 bool ARMBaseInstrInfo:: 1805 isProfitableToIfCvt(MachineBasicBlock &TMBB, 1806 unsigned TCycles, unsigned TExtra, 1807 MachineBasicBlock &FMBB, 1808 unsigned FCycles, unsigned FExtra, 1809 BranchProbability Probability) const { 1810 if (!TCycles || !FCycles) 1811 return false; 1812 1813 // Attempt to estimate the relative costs of predication versus branching. 1814 // Here we scale up each component of UnpredCost to avoid precision issue when 1815 // scaling TCycles/FCycles by Probability. 1816 const unsigned ScalingUpFactor = 1024; 1817 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); 1818 unsigned FUnpredCost = 1819 Probability.getCompl().scale(FCycles * ScalingUpFactor); 1820 unsigned UnpredCost = TUnpredCost + FUnpredCost; 1821 UnpredCost += 1 * ScalingUpFactor; // The branch itself 1822 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; 1823 1824 return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; 1825 } 1826 1827 bool 1828 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 1829 MachineBasicBlock &FMBB) const { 1830 // Reduce false anti-dependencies to let the target's out-of-order execution 1831 // engine do its thing. 1832 return Subtarget.isProfitableToUnpredicate(); 1833 } 1834 1835 /// getInstrPredicate - If instruction is predicated, returns its predicate 1836 /// condition, otherwise returns AL. It also returns the condition code 1837 /// register by reference. 1838 ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, 1839 unsigned &PredReg) { 1840 int PIdx = MI.findFirstPredOperandIdx(); 1841 if (PIdx == -1) { 1842 PredReg = 0; 1843 return ARMCC::AL; 1844 } 1845 1846 PredReg = MI.getOperand(PIdx+1).getReg(); 1847 return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); 1848 } 1849 1850 unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { 1851 if (Opc == ARM::B) 1852 return ARM::Bcc; 1853 if (Opc == ARM::tB) 1854 return ARM::tBcc; 1855 if (Opc == ARM::t2B) 1856 return ARM::t2Bcc; 1857 1858 llvm_unreachable("Unknown unconditional branch opcode!"); 1859 } 1860 1861 MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, 1862 bool NewMI, 1863 unsigned OpIdx1, 1864 unsigned OpIdx2) const { 1865 switch (MI.getOpcode()) { 1866 case ARM::MOVCCr: 1867 case ARM::t2MOVCCr: { 1868 // MOVCC can be commuted by inverting the condition. 1869 unsigned PredReg = 0; 1870 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); 1871 // MOVCC AL can't be inverted. Shouldn't happen. 1872 if (CC == ARMCC::AL || PredReg != ARM::CPSR) 1873 return nullptr; 1874 MachineInstr *CommutedMI = 1875 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 1876 if (!CommutedMI) 1877 return nullptr; 1878 // After swapping the MOVCC operands, also invert the condition. 1879 CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx()) 1880 .setImm(ARMCC::getOppositeCondition(CC)); 1881 return CommutedMI; 1882 } 1883 } 1884 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 1885 } 1886 1887 /// Identify instructions that can be folded into a MOVCC instruction, and 1888 /// return the defining instruction. 1889 static MachineInstr *canFoldIntoMOVCC(unsigned Reg, 1890 const MachineRegisterInfo &MRI, 1891 const TargetInstrInfo *TII) { 1892 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1893 return nullptr; 1894 if (!MRI.hasOneNonDBGUse(Reg)) 1895 return nullptr; 1896 MachineInstr *MI = MRI.getVRegDef(Reg); 1897 if (!MI) 1898 return nullptr; 1899 // MI is folded into the MOVCC by predicating it. 1900 if (!MI->isPredicable()) 1901 return nullptr; 1902 // Check if MI has any non-dead defs or physreg uses. This also detects 1903 // predicated instructions which will be reading CPSR. 1904 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { 1905 const MachineOperand &MO = MI->getOperand(i); 1906 // Reject frame index operands, PEI can't handle the predicated pseudos. 1907 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1908 return nullptr; 1909 if (!MO.isReg()) 1910 continue; 1911 // MI can't have any tied operands, that would conflict with predication. 1912 if (MO.isTied()) 1913 return nullptr; 1914 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1915 return nullptr; 1916 if (MO.isDef() && !MO.isDead()) 1917 return nullptr; 1918 } 1919 bool DontMoveAcrossStores = true; 1920 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1921 return nullptr; 1922 return MI; 1923 } 1924 1925 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, 1926 SmallVectorImpl<MachineOperand> &Cond, 1927 unsigned &TrueOp, unsigned &FalseOp, 1928 bool &Optimizable) const { 1929 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 1930 "Unknown select instruction"); 1931 // MOVCC operands: 1932 // 0: Def. 1933 // 1: True use. 1934 // 2: False use. 1935 // 3: Condition code. 1936 // 4: CPSR use. 1937 TrueOp = 1; 1938 FalseOp = 2; 1939 Cond.push_back(MI.getOperand(3)); 1940 Cond.push_back(MI.getOperand(4)); 1941 // We can always fold a def. 1942 Optimizable = true; 1943 return false; 1944 } 1945 1946 MachineInstr * 1947 ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, 1948 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1949 bool PreferFalse) const { 1950 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && 1951 "Unknown select instruction"); 1952 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1953 MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this); 1954 bool Invert = !DefMI; 1955 if (!DefMI) 1956 DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this); 1957 if (!DefMI) 1958 return nullptr; 1959 1960 // Find new register class to use. 1961 MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); 1962 unsigned DestReg = MI.getOperand(0).getReg(); 1963 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1964 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1965 return nullptr; 1966 1967 // Create a new predicated version of DefMI. 1968 // Rfalse is the first use. 1969 MachineInstrBuilder NewMI = 1970 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg); 1971 1972 // Copy all the DefMI operands, excluding its (null) predicate. 1973 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1974 for (unsigned i = 1, e = DefDesc.getNumOperands(); 1975 i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) 1976 NewMI.add(DefMI->getOperand(i)); 1977 1978 unsigned CondCode = MI.getOperand(3).getImm(); 1979 if (Invert) 1980 NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); 1981 else 1982 NewMI.addImm(CondCode); 1983 NewMI.add(MI.getOperand(4)); 1984 1985 // DefMI is not the -S version that sets CPSR, so add an optional %noreg. 1986 if (NewMI->hasOptionalDef()) 1987 NewMI.add(condCodeOp()); 1988 1989 // The output register value when the predicate is false is an implicit 1990 // register operand tied to the first def. 1991 // The tie makes the register allocator ensure the FalseReg is allocated the 1992 // same register as operand 0. 1993 FalseReg.setImplicit(); 1994 NewMI.add(FalseReg); 1995 NewMI->tieOperands(0, NewMI->getNumOperands() - 1); 1996 1997 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1998 SeenMIs.insert(NewMI); 1999 SeenMIs.erase(DefMI); 2000 2001 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 2002 // DefMI would be invalid when tranferred inside the loop. Checking for a 2003 // loop is expensive, but at least remove kill flags if they are in different 2004 // BBs. 2005 if (DefMI->getParent() != MI.getParent()) 2006 NewMI->clearKillInfo(); 2007 2008 // The caller will erase MI, but not DefMI. 2009 DefMI->eraseFromParent(); 2010 return NewMI; 2011 } 2012 2013 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 2014 /// instruction is encoded with an 'S' bit is determined by the optional CPSR 2015 /// def operand. 2016 /// 2017 /// This will go away once we can teach tblgen how to set the optional CPSR def 2018 /// operand itself. 2019 struct AddSubFlagsOpcodePair { 2020 uint16_t PseudoOpc; 2021 uint16_t MachineOpc; 2022 }; 2023 2024 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 2025 {ARM::ADDSri, ARM::ADDri}, 2026 {ARM::ADDSrr, ARM::ADDrr}, 2027 {ARM::ADDSrsi, ARM::ADDrsi}, 2028 {ARM::ADDSrsr, ARM::ADDrsr}, 2029 2030 {ARM::SUBSri, ARM::SUBri}, 2031 {ARM::SUBSrr, ARM::SUBrr}, 2032 {ARM::SUBSrsi, ARM::SUBrsi}, 2033 {ARM::SUBSrsr, ARM::SUBrsr}, 2034 2035 {ARM::RSBSri, ARM::RSBri}, 2036 {ARM::RSBSrsi, ARM::RSBrsi}, 2037 {ARM::RSBSrsr, ARM::RSBrsr}, 2038 2039 {ARM::t2ADDSri, ARM::t2ADDri}, 2040 {ARM::t2ADDSrr, ARM::t2ADDrr}, 2041 {ARM::t2ADDSrs, ARM::t2ADDrs}, 2042 2043 {ARM::t2SUBSri, ARM::t2SUBri}, 2044 {ARM::t2SUBSrr, ARM::t2SUBrr}, 2045 {ARM::t2SUBSrs, ARM::t2SUBrs}, 2046 2047 {ARM::t2RSBSri, ARM::t2RSBri}, 2048 {ARM::t2RSBSrs, ARM::t2RSBrs}, 2049 }; 2050 2051 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 2052 for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) 2053 if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) 2054 return AddSubFlagsOpcodeMap[i].MachineOpc; 2055 return 0; 2056 } 2057 2058 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 2059 MachineBasicBlock::iterator &MBBI, 2060 const DebugLoc &dl, unsigned DestReg, 2061 unsigned BaseReg, int NumBytes, 2062 ARMCC::CondCodes Pred, unsigned PredReg, 2063 const ARMBaseInstrInfo &TII, 2064 unsigned MIFlags) { 2065 if (NumBytes == 0 && DestReg != BaseReg) { 2066 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) 2067 .addReg(BaseReg, RegState::Kill) 2068 .add(predOps(Pred, PredReg)) 2069 .add(condCodeOp()) 2070 .setMIFlags(MIFlags); 2071 return; 2072 } 2073 2074 bool isSub = NumBytes < 0; 2075 if (isSub) NumBytes = -NumBytes; 2076 2077 while (NumBytes) { 2078 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 2079 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 2080 assert(ThisVal && "Didn't extract field correctly"); 2081 2082 // We will handle these bits from offset, clear them. 2083 NumBytes &= ~ThisVal; 2084 2085 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 2086 2087 // Build the new ADD / SUB. 2088 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 2089 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 2090 .addReg(BaseReg, RegState::Kill) 2091 .addImm(ThisVal) 2092 .add(predOps(Pred, PredReg)) 2093 .add(condCodeOp()) 2094 .setMIFlags(MIFlags); 2095 BaseReg = DestReg; 2096 } 2097 } 2098 2099 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, 2100 MachineFunction &MF, MachineInstr *MI, 2101 unsigned NumBytes) { 2102 // This optimisation potentially adds lots of load and store 2103 // micro-operations, it's only really a great benefit to code-size. 2104 if (!MF.getFunction()->optForMinSize()) 2105 return false; 2106 2107 // If only one register is pushed/popped, LLVM can use an LDR/STR 2108 // instead. We can't modify those so make sure we're dealing with an 2109 // instruction we understand. 2110 bool IsPop = isPopOpcode(MI->getOpcode()); 2111 bool IsPush = isPushOpcode(MI->getOpcode()); 2112 if (!IsPush && !IsPop) 2113 return false; 2114 2115 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || 2116 MI->getOpcode() == ARM::VLDMDIA_UPD; 2117 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || 2118 MI->getOpcode() == ARM::tPOP || 2119 MI->getOpcode() == ARM::tPOP_RET; 2120 2121 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && 2122 MI->getOperand(1).getReg() == ARM::SP)) && 2123 "trying to fold sp update into non-sp-updating push/pop"); 2124 2125 // The VFP push & pop act on D-registers, so we can only fold an adjustment 2126 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try 2127 // if this is violated. 2128 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) 2129 return false; 2130 2131 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ 2132 // pred) so the list starts at 4. Thumb1 starts after the predicate. 2133 int RegListIdx = IsT1PushPop ? 2 : 4; 2134 2135 // Calculate the space we'll need in terms of registers. 2136 unsigned RegsNeeded; 2137 const TargetRegisterClass *RegClass; 2138 if (IsVFPPushPop) { 2139 RegsNeeded = NumBytes / 8; 2140 RegClass = &ARM::DPRRegClass; 2141 } else { 2142 RegsNeeded = NumBytes / 4; 2143 RegClass = &ARM::GPRRegClass; 2144 } 2145 2146 // We're going to have to strip all list operands off before 2147 // re-adding them since the order matters, so save the existing ones 2148 // for later. 2149 SmallVector<MachineOperand, 4> RegList; 2150 2151 // We're also going to need the first register transferred by this 2152 // instruction, which won't necessarily be the first register in the list. 2153 unsigned FirstRegEnc = -1; 2154 2155 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); 2156 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { 2157 MachineOperand &MO = MI->getOperand(i); 2158 RegList.push_back(MO); 2159 2160 if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) 2161 FirstRegEnc = TRI->getEncodingValue(MO.getReg()); 2162 } 2163 2164 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); 2165 2166 // Now try to find enough space in the reglist to allocate NumBytes. 2167 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; 2168 --CurRegEnc) { 2169 unsigned CurReg = RegClass->getRegister(CurRegEnc); 2170 if (!IsPop) { 2171 // Pushing any register is completely harmless, mark the 2172 // register involved as undef since we don't care about it in 2173 // the slightest. 2174 RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, 2175 false, false, true)); 2176 --RegsNeeded; 2177 continue; 2178 } 2179 2180 // However, we can only pop an extra register if it's not live. For 2181 // registers live within the function we might clobber a return value 2182 // register; the other way a register can be live here is if it's 2183 // callee-saved. 2184 if (isCalleeSavedRegister(CurReg, CSRegs) || 2185 MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != 2186 MachineBasicBlock::LQR_Dead) { 2187 // VFP pops don't allow holes in the register list, so any skip is fatal 2188 // for our transformation. GPR pops do, so we should just keep looking. 2189 if (IsVFPPushPop) 2190 return false; 2191 else 2192 continue; 2193 } 2194 2195 // Mark the unimportant registers as <def,dead> in the POP. 2196 RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, 2197 true)); 2198 --RegsNeeded; 2199 } 2200 2201 if (RegsNeeded > 0) 2202 return false; 2203 2204 // Finally we know we can profitably perform the optimisation so go 2205 // ahead: strip all existing registers off and add them back again 2206 // in the right order. 2207 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) 2208 MI->RemoveOperand(i); 2209 2210 // Add the complete list back in. 2211 MachineInstrBuilder MIB(MF, &*MI); 2212 for (int i = RegList.size() - 1; i >= 0; --i) 2213 MIB.add(RegList[i]); 2214 2215 return true; 2216 } 2217 2218 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2219 unsigned FrameReg, int &Offset, 2220 const ARMBaseInstrInfo &TII) { 2221 unsigned Opcode = MI.getOpcode(); 2222 const MCInstrDesc &Desc = MI.getDesc(); 2223 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 2224 bool isSub = false; 2225 2226 // Memory operands in inline assembly always use AddrMode2. 2227 if (Opcode == ARM::INLINEASM) 2228 AddrMode = ARMII::AddrMode2; 2229 2230 if (Opcode == ARM::ADDri) { 2231 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 2232 if (Offset == 0) { 2233 // Turn it into a move. 2234 MI.setDesc(TII.get(ARM::MOVr)); 2235 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2236 MI.RemoveOperand(FrameRegIdx+1); 2237 Offset = 0; 2238 return true; 2239 } else if (Offset < 0) { 2240 Offset = -Offset; 2241 isSub = true; 2242 MI.setDesc(TII.get(ARM::SUBri)); 2243 } 2244 2245 // Common case: small offset, fits into instruction. 2246 if (ARM_AM::getSOImmVal(Offset) != -1) { 2247 // Replace the FrameIndex with sp / fp 2248 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2249 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 2250 Offset = 0; 2251 return true; 2252 } 2253 2254 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 2255 // as possible. 2256 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 2257 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 2258 2259 // We will handle these bits from offset, clear them. 2260 Offset &= ~ThisImmVal; 2261 2262 // Get the properly encoded SOImmVal field. 2263 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 2264 "Bit extraction didn't work?"); 2265 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 2266 } else { 2267 unsigned ImmIdx = 0; 2268 int InstrOffs = 0; 2269 unsigned NumBits = 0; 2270 unsigned Scale = 1; 2271 switch (AddrMode) { 2272 case ARMII::AddrMode_i12: 2273 ImmIdx = FrameRegIdx + 1; 2274 InstrOffs = MI.getOperand(ImmIdx).getImm(); 2275 NumBits = 12; 2276 break; 2277 case ARMII::AddrMode2: 2278 ImmIdx = FrameRegIdx+2; 2279 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 2280 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2281 InstrOffs *= -1; 2282 NumBits = 12; 2283 break; 2284 case ARMII::AddrMode3: 2285 ImmIdx = FrameRegIdx+2; 2286 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 2287 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2288 InstrOffs *= -1; 2289 NumBits = 8; 2290 break; 2291 case ARMII::AddrMode4: 2292 case ARMII::AddrMode6: 2293 // Can't fold any offset even if it's zero. 2294 return false; 2295 case ARMII::AddrMode5: 2296 ImmIdx = FrameRegIdx+1; 2297 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 2298 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 2299 InstrOffs *= -1; 2300 NumBits = 8; 2301 Scale = 4; 2302 break; 2303 default: 2304 llvm_unreachable("Unsupported addressing mode!"); 2305 } 2306 2307 Offset += InstrOffs * Scale; 2308 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 2309 if (Offset < 0) { 2310 Offset = -Offset; 2311 isSub = true; 2312 } 2313 2314 // Attempt to fold address comp. if opcode has offset bits 2315 if (NumBits > 0) { 2316 // Common case: small offset, fits into instruction. 2317 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 2318 int ImmedOffset = Offset / Scale; 2319 unsigned Mask = (1 << NumBits) - 1; 2320 if ((unsigned)Offset <= Mask * Scale) { 2321 // Replace the FrameIndex with sp 2322 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2323 // FIXME: When addrmode2 goes away, this will simplify (like the 2324 // T2 version), as the LDR.i12 versions don't need the encoding 2325 // tricks for the offset value. 2326 if (isSub) { 2327 if (AddrMode == ARMII::AddrMode_i12) 2328 ImmedOffset = -ImmedOffset; 2329 else 2330 ImmedOffset |= 1 << NumBits; 2331 } 2332 ImmOp.ChangeToImmediate(ImmedOffset); 2333 Offset = 0; 2334 return true; 2335 } 2336 2337 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 2338 ImmedOffset = ImmedOffset & Mask; 2339 if (isSub) { 2340 if (AddrMode == ARMII::AddrMode_i12) 2341 ImmedOffset = -ImmedOffset; 2342 else 2343 ImmedOffset |= 1 << NumBits; 2344 } 2345 ImmOp.ChangeToImmediate(ImmedOffset); 2346 Offset &= ~(Mask*Scale); 2347 } 2348 } 2349 2350 Offset = (isSub) ? -Offset : Offset; 2351 return Offset == 0; 2352 } 2353 2354 /// analyzeCompare - For a comparison instruction, return the source registers 2355 /// in SrcReg and SrcReg2 if having two register operands, and the value it 2356 /// compares against in CmpValue. Return true if the comparison instruction 2357 /// can be analyzed. 2358 bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, 2359 unsigned &SrcReg2, int &CmpMask, 2360 int &CmpValue) const { 2361 switch (MI.getOpcode()) { 2362 default: break; 2363 case ARM::CMPri: 2364 case ARM::t2CMPri: 2365 case ARM::tCMPi8: 2366 SrcReg = MI.getOperand(0).getReg(); 2367 SrcReg2 = 0; 2368 CmpMask = ~0; 2369 CmpValue = MI.getOperand(1).getImm(); 2370 return true; 2371 case ARM::CMPrr: 2372 case ARM::t2CMPrr: 2373 SrcReg = MI.getOperand(0).getReg(); 2374 SrcReg2 = MI.getOperand(1).getReg(); 2375 CmpMask = ~0; 2376 CmpValue = 0; 2377 return true; 2378 case ARM::TSTri: 2379 case ARM::t2TSTri: 2380 SrcReg = MI.getOperand(0).getReg(); 2381 SrcReg2 = 0; 2382 CmpMask = MI.getOperand(1).getImm(); 2383 CmpValue = 0; 2384 return true; 2385 } 2386 2387 return false; 2388 } 2389 2390 /// isSuitableForMask - Identify a suitable 'and' instruction that 2391 /// operates on the given source register and applies the same mask 2392 /// as a 'tst' instruction. Provide a limited look-through for copies. 2393 /// When successful, MI will hold the found instruction. 2394 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 2395 int CmpMask, bool CommonUse) { 2396 switch (MI->getOpcode()) { 2397 case ARM::ANDri: 2398 case ARM::t2ANDri: 2399 if (CmpMask != MI->getOperand(2).getImm()) 2400 return false; 2401 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 2402 return true; 2403 break; 2404 } 2405 2406 return false; 2407 } 2408 2409 /// getSwappedCondition - assume the flags are set by MI(a,b), return 2410 /// the condition code if we modify the instructions such that flags are 2411 /// set by MI(b,a). 2412 inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { 2413 switch (CC) { 2414 default: return ARMCC::AL; 2415 case ARMCC::EQ: return ARMCC::EQ; 2416 case ARMCC::NE: return ARMCC::NE; 2417 case ARMCC::HS: return ARMCC::LS; 2418 case ARMCC::LO: return ARMCC::HI; 2419 case ARMCC::HI: return ARMCC::LO; 2420 case ARMCC::LS: return ARMCC::HS; 2421 case ARMCC::GE: return ARMCC::LE; 2422 case ARMCC::LT: return ARMCC::GT; 2423 case ARMCC::GT: return ARMCC::LT; 2424 case ARMCC::LE: return ARMCC::GE; 2425 } 2426 } 2427 2428 /// isRedundantFlagInstr - check whether the first instruction, whose only 2429 /// purpose is to update flags, can be made redundant. 2430 /// CMPrr can be made redundant by SUBrr if the operands are the same. 2431 /// CMPri can be made redundant by SUBri if the operands are the same. 2432 /// This function can be extended later on. 2433 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, 2434 unsigned SrcReg2, int ImmValue, 2435 MachineInstr *OI) { 2436 if ((CmpI->getOpcode() == ARM::CMPrr || 2437 CmpI->getOpcode() == ARM::t2CMPrr) && 2438 (OI->getOpcode() == ARM::SUBrr || 2439 OI->getOpcode() == ARM::t2SUBrr) && 2440 ((OI->getOperand(1).getReg() == SrcReg && 2441 OI->getOperand(2).getReg() == SrcReg2) || 2442 (OI->getOperand(1).getReg() == SrcReg2 && 2443 OI->getOperand(2).getReg() == SrcReg))) 2444 return true; 2445 2446 if ((CmpI->getOpcode() == ARM::CMPri || 2447 CmpI->getOpcode() == ARM::t2CMPri) && 2448 (OI->getOpcode() == ARM::SUBri || 2449 OI->getOpcode() == ARM::t2SUBri) && 2450 OI->getOperand(1).getReg() == SrcReg && 2451 OI->getOperand(2).getImm() == ImmValue) 2452 return true; 2453 return false; 2454 } 2455 2456 static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { 2457 switch (MI->getOpcode()) { 2458 default: return false; 2459 case ARM::tLSLri: 2460 case ARM::tLSRri: 2461 case ARM::tLSLrr: 2462 case ARM::tLSRrr: 2463 case ARM::tSUBrr: 2464 case ARM::tADDrr: 2465 case ARM::tADDi3: 2466 case ARM::tADDi8: 2467 case ARM::tSUBi3: 2468 case ARM::tSUBi8: 2469 case ARM::tMUL: 2470 IsThumb1 = true; 2471 LLVM_FALLTHROUGH; 2472 case ARM::RSBrr: 2473 case ARM::RSBri: 2474 case ARM::RSCrr: 2475 case ARM::RSCri: 2476 case ARM::ADDrr: 2477 case ARM::ADDri: 2478 case ARM::ADCrr: 2479 case ARM::ADCri: 2480 case ARM::SUBrr: 2481 case ARM::SUBri: 2482 case ARM::SBCrr: 2483 case ARM::SBCri: 2484 case ARM::t2RSBri: 2485 case ARM::t2ADDrr: 2486 case ARM::t2ADDri: 2487 case ARM::t2ADCrr: 2488 case ARM::t2ADCri: 2489 case ARM::t2SUBrr: 2490 case ARM::t2SUBri: 2491 case ARM::t2SBCrr: 2492 case ARM::t2SBCri: 2493 case ARM::ANDrr: 2494 case ARM::ANDri: 2495 case ARM::t2ANDrr: 2496 case ARM::t2ANDri: 2497 case ARM::ORRrr: 2498 case ARM::ORRri: 2499 case ARM::t2ORRrr: 2500 case ARM::t2ORRri: 2501 case ARM::EORrr: 2502 case ARM::EORri: 2503 case ARM::t2EORrr: 2504 case ARM::t2EORri: 2505 case ARM::t2LSRri: 2506 case ARM::t2LSRrr: 2507 case ARM::t2LSLri: 2508 case ARM::t2LSLrr: 2509 return true; 2510 } 2511 } 2512 2513 /// optimizeCompareInstr - Convert the instruction supplying the argument to the 2514 /// comparison into one that sets the zero bit in the flags register; 2515 /// Remove a redundant Compare instruction if an earlier instruction can set the 2516 /// flags in the same way as Compare. 2517 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two 2518 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the 2519 /// condition code of instructions which use the flags. 2520 bool ARMBaseInstrInfo::optimizeCompareInstr( 2521 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 2522 int CmpValue, const MachineRegisterInfo *MRI) const { 2523 // Get the unique definition of SrcReg. 2524 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 2525 if (!MI) return false; 2526 2527 // Masked compares sometimes use the same register as the corresponding 'and'. 2528 if (CmpMask != ~0) { 2529 if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) { 2530 MI = nullptr; 2531 for (MachineRegisterInfo::use_instr_iterator 2532 UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); 2533 UI != UE; ++UI) { 2534 if (UI->getParent() != CmpInstr.getParent()) 2535 continue; 2536 MachineInstr *PotentialAND = &*UI; 2537 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || 2538 isPredicated(*PotentialAND)) 2539 continue; 2540 MI = PotentialAND; 2541 break; 2542 } 2543 if (!MI) return false; 2544 } 2545 } 2546 2547 // Get ready to iterate backward from CmpInstr. 2548 MachineBasicBlock::iterator I = CmpInstr, E = MI, 2549 B = CmpInstr.getParent()->begin(); 2550 2551 // Early exit if CmpInstr is at the beginning of the BB. 2552 if (I == B) return false; 2553 2554 // There are two possible candidates which can be changed to set CPSR: 2555 // One is MI, the other is a SUB instruction. 2556 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). 2557 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). 2558 MachineInstr *Sub = nullptr; 2559 if (SrcReg2 != 0) 2560 // MI is not a candidate for CMPrr. 2561 MI = nullptr; 2562 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { 2563 // Conservatively refuse to convert an instruction which isn't in the same 2564 // BB as the comparison. 2565 // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. 2566 // Thus we cannot return here. 2567 if (CmpInstr.getOpcode() == ARM::CMPri || 2568 CmpInstr.getOpcode() == ARM::t2CMPri) 2569 MI = nullptr; 2570 else 2571 return false; 2572 } 2573 2574 bool IsThumb1 = false; 2575 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) 2576 return false; 2577 2578 // We also want to do this peephole for cases like this: if (a*b == 0), 2579 // and optimise away the CMP instruction from the generated code sequence: 2580 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values 2581 // resulting from the select instruction, but these MOVS instructions for 2582 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. 2583 // However, if we only have MOVS instructions in between the CMP and the 2584 // other instruction (the MULS in this example), then the CPSR is dead so we 2585 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this 2586 // reordering and then continue the analysis hoping we can eliminate the 2587 // CMP. This peephole works on the vregs, so is still in SSA form. As a 2588 // consequence, the movs won't redefine/kill the MUL operands which would 2589 // make this reordering illegal. 2590 if (MI && IsThumb1) { 2591 --I; 2592 bool CanReorder = true; 2593 const bool HasStmts = I != E; 2594 for (; I != E; --I) { 2595 if (I->getOpcode() != ARM::tMOVi8) { 2596 CanReorder = false; 2597 break; 2598 } 2599 } 2600 if (HasStmts && CanReorder) { 2601 MI = MI->removeFromParent(); 2602 E = CmpInstr; 2603 CmpInstr.getParent()->insert(E, MI); 2604 } 2605 I = CmpInstr; 2606 E = MI; 2607 } 2608 2609 // Check that CPSR isn't set between the comparison instruction and the one we 2610 // want to change. At the same time, search for Sub. 2611 const TargetRegisterInfo *TRI = &getRegisterInfo(); 2612 --I; 2613 for (; I != E; --I) { 2614 const MachineInstr &Instr = *I; 2615 2616 if (Instr.modifiesRegister(ARM::CPSR, TRI) || 2617 Instr.readsRegister(ARM::CPSR, TRI)) 2618 // This instruction modifies or uses CPSR after the one we want to 2619 // change. We can't do this transformation. 2620 return false; 2621 2622 // Check whether CmpInstr can be made redundant by the current instruction. 2623 if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { 2624 Sub = &*I; 2625 break; 2626 } 2627 2628 if (I == B) 2629 // The 'and' is below the comparison instruction. 2630 return false; 2631 } 2632 2633 // Return false if no candidates exist. 2634 if (!MI && !Sub) 2635 return false; 2636 2637 // The single candidate is called MI. 2638 if (!MI) MI = Sub; 2639 2640 // We can't use a predicated instruction - it doesn't always write the flags. 2641 if (isPredicated(*MI)) 2642 return false; 2643 2644 // Scan forward for the use of CPSR 2645 // When checking against MI: if it's a conditional code that requires 2646 // checking of the V bit or C bit, then this is not safe to do. 2647 // It is safe to remove CmpInstr if CPSR is redefined or killed. 2648 // If we are done with the basic block, we need to check whether CPSR is 2649 // live-out. 2650 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> 2651 OperandsToUpdate; 2652 bool isSafe = false; 2653 I = CmpInstr; 2654 E = CmpInstr.getParent()->end(); 2655 while (!isSafe && ++I != E) { 2656 const MachineInstr &Instr = *I; 2657 for (unsigned IO = 0, EO = Instr.getNumOperands(); 2658 !isSafe && IO != EO; ++IO) { 2659 const MachineOperand &MO = Instr.getOperand(IO); 2660 if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { 2661 isSafe = true; 2662 break; 2663 } 2664 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 2665 continue; 2666 if (MO.isDef()) { 2667 isSafe = true; 2668 break; 2669 } 2670 // Condition code is after the operand before CPSR except for VSELs. 2671 ARMCC::CondCodes CC; 2672 bool IsInstrVSel = true; 2673 switch (Instr.getOpcode()) { 2674 default: 2675 IsInstrVSel = false; 2676 CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); 2677 break; 2678 case ARM::VSELEQD: 2679 case ARM::VSELEQS: 2680 CC = ARMCC::EQ; 2681 break; 2682 case ARM::VSELGTD: 2683 case ARM::VSELGTS: 2684 CC = ARMCC::GT; 2685 break; 2686 case ARM::VSELGED: 2687 case ARM::VSELGES: 2688 CC = ARMCC::GE; 2689 break; 2690 case ARM::VSELVSS: 2691 case ARM::VSELVSD: 2692 CC = ARMCC::VS; 2693 break; 2694 } 2695 2696 if (Sub) { 2697 ARMCC::CondCodes NewCC = getSwappedCondition(CC); 2698 if (NewCC == ARMCC::AL) 2699 return false; 2700 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based 2701 // on CMP needs to be updated to be based on SUB. 2702 // Push the condition code operands to OperandsToUpdate. 2703 // If it is safe to remove CmpInstr, the condition code of these 2704 // operands will be modified. 2705 if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && 2706 Sub->getOperand(2).getReg() == SrcReg) { 2707 // VSel doesn't support condition code update. 2708 if (IsInstrVSel) 2709 return false; 2710 OperandsToUpdate.push_back( 2711 std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); 2712 } 2713 } else { 2714 // No Sub, so this is x = <op> y, z; cmp x, 0. 2715 switch (CC) { 2716 case ARMCC::EQ: // Z 2717 case ARMCC::NE: // Z 2718 case ARMCC::MI: // N 2719 case ARMCC::PL: // N 2720 case ARMCC::AL: // none 2721 // CPSR can be used multiple times, we should continue. 2722 break; 2723 case ARMCC::HS: // C 2724 case ARMCC::LO: // C 2725 case ARMCC::VS: // V 2726 case ARMCC::VC: // V 2727 case ARMCC::HI: // C Z 2728 case ARMCC::LS: // C Z 2729 case ARMCC::GE: // N V 2730 case ARMCC::LT: // N V 2731 case ARMCC::GT: // Z N V 2732 case ARMCC::LE: // Z N V 2733 // The instruction uses the V bit or C bit which is not safe. 2734 return false; 2735 } 2736 } 2737 } 2738 } 2739 2740 // If CPSR is not killed nor re-defined, we should check whether it is 2741 // live-out. If it is live-out, do not optimize. 2742 if (!isSafe) { 2743 MachineBasicBlock *MBB = CmpInstr.getParent(); 2744 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 2745 SE = MBB->succ_end(); SI != SE; ++SI) 2746 if ((*SI)->isLiveIn(ARM::CPSR)) 2747 return false; 2748 } 2749 2750 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always 2751 // set CPSR so this is represented as an explicit output) 2752 if (!IsThumb1) { 2753 MI->getOperand(5).setReg(ARM::CPSR); 2754 MI->getOperand(5).setIsDef(true); 2755 } 2756 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); 2757 CmpInstr.eraseFromParent(); 2758 2759 // Modify the condition code of operands in OperandsToUpdate. 2760 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to 2761 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. 2762 for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) 2763 OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); 2764 2765 return true; 2766 } 2767 2768 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 2769 unsigned Reg, 2770 MachineRegisterInfo *MRI) const { 2771 // Fold large immediates into add, sub, or, xor. 2772 unsigned DefOpc = DefMI.getOpcode(); 2773 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 2774 return false; 2775 if (!DefMI.getOperand(1).isImm()) 2776 // Could be t2MOVi32imm <ga:xx> 2777 return false; 2778 2779 if (!MRI->hasOneNonDBGUse(Reg)) 2780 return false; 2781 2782 const MCInstrDesc &DefMCID = DefMI.getDesc(); 2783 if (DefMCID.hasOptionalDef()) { 2784 unsigned NumOps = DefMCID.getNumOperands(); 2785 const MachineOperand &MO = DefMI.getOperand(NumOps - 1); 2786 if (MO.getReg() == ARM::CPSR && !MO.isDead()) 2787 // If DefMI defines CPSR and it is not dead, it's obviously not safe 2788 // to delete DefMI. 2789 return false; 2790 } 2791 2792 const MCInstrDesc &UseMCID = UseMI.getDesc(); 2793 if (UseMCID.hasOptionalDef()) { 2794 unsigned NumOps = UseMCID.getNumOperands(); 2795 if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR) 2796 // If the instruction sets the flag, do not attempt this optimization 2797 // since it may change the semantics of the code. 2798 return false; 2799 } 2800 2801 unsigned UseOpc = UseMI.getOpcode(); 2802 unsigned NewUseOpc = 0; 2803 uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm(); 2804 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 2805 bool Commute = false; 2806 switch (UseOpc) { 2807 default: return false; 2808 case ARM::SUBrr: 2809 case ARM::ADDrr: 2810 case ARM::ORRrr: 2811 case ARM::EORrr: 2812 case ARM::t2SUBrr: 2813 case ARM::t2ADDrr: 2814 case ARM::t2ORRrr: 2815 case ARM::t2EORrr: { 2816 Commute = UseMI.getOperand(2).getReg() != Reg; 2817 switch (UseOpc) { 2818 default: break; 2819 case ARM::ADDrr: 2820 case ARM::SUBrr: 2821 if (UseOpc == ARM::SUBrr && Commute) 2822 return false; 2823 2824 // ADD/SUB are special because they're essentially the same operation, so 2825 // we can handle a larger range of immediates. 2826 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) 2827 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; 2828 else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) { 2829 ImmVal = -ImmVal; 2830 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; 2831 } else 2832 return false; 2833 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 2834 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 2835 break; 2836 case ARM::ORRrr: 2837 case ARM::EORrr: 2838 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 2839 return false; 2840 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 2841 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 2842 switch (UseOpc) { 2843 default: break; 2844 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 2845 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 2846 } 2847 break; 2848 case ARM::t2ADDrr: 2849 case ARM::t2SUBrr: 2850 if (UseOpc == ARM::t2SUBrr && Commute) 2851 return false; 2852 2853 // ADD/SUB are special because they're essentially the same operation, so 2854 // we can handle a larger range of immediates. 2855 if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 2856 NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri; 2857 else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { 2858 ImmVal = -ImmVal; 2859 NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri; 2860 } else 2861 return false; 2862 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 2863 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 2864 break; 2865 case ARM::t2ORRrr: 2866 case ARM::t2EORrr: 2867 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 2868 return false; 2869 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 2870 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 2871 switch (UseOpc) { 2872 default: break; 2873 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 2874 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 2875 } 2876 break; 2877 } 2878 } 2879 } 2880 2881 unsigned OpIdx = Commute ? 2 : 1; 2882 unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); 2883 bool isKill = UseMI.getOperand(OpIdx).isKill(); 2884 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 2885 BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), 2886 NewReg) 2887 .addReg(Reg1, getKillRegState(isKill)) 2888 .addImm(SOImmValV1) 2889 .add(predOps(ARMCC::AL)) 2890 .add(condCodeOp()); 2891 UseMI.setDesc(get(NewUseOpc)); 2892 UseMI.getOperand(1).setReg(NewReg); 2893 UseMI.getOperand(1).setIsKill(); 2894 UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); 2895 DefMI.eraseFromParent(); 2896 return true; 2897 } 2898 2899 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, 2900 const MachineInstr &MI) { 2901 switch (MI.getOpcode()) { 2902 default: { 2903 const MCInstrDesc &Desc = MI.getDesc(); 2904 int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); 2905 assert(UOps >= 0 && "bad # UOps"); 2906 return UOps; 2907 } 2908 2909 case ARM::LDRrs: 2910 case ARM::LDRBrs: 2911 case ARM::STRrs: 2912 case ARM::STRBrs: { 2913 unsigned ShOpVal = MI.getOperand(3).getImm(); 2914 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2915 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2916 if (!isSub && 2917 (ShImm == 0 || 2918 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2919 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2920 return 1; 2921 return 2; 2922 } 2923 2924 case ARM::LDRH: 2925 case ARM::STRH: { 2926 if (!MI.getOperand(2).getReg()) 2927 return 1; 2928 2929 unsigned ShOpVal = MI.getOperand(3).getImm(); 2930 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2931 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2932 if (!isSub && 2933 (ShImm == 0 || 2934 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2935 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2936 return 1; 2937 return 2; 2938 } 2939 2940 case ARM::LDRSB: 2941 case ARM::LDRSH: 2942 return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2; 2943 2944 case ARM::LDRSB_POST: 2945 case ARM::LDRSH_POST: { 2946 unsigned Rt = MI.getOperand(0).getReg(); 2947 unsigned Rm = MI.getOperand(3).getReg(); 2948 return (Rt == Rm) ? 4 : 3; 2949 } 2950 2951 case ARM::LDR_PRE_REG: 2952 case ARM::LDRB_PRE_REG: { 2953 unsigned Rt = MI.getOperand(0).getReg(); 2954 unsigned Rm = MI.getOperand(3).getReg(); 2955 if (Rt == Rm) 2956 return 3; 2957 unsigned ShOpVal = MI.getOperand(4).getImm(); 2958 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2959 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2960 if (!isSub && 2961 (ShImm == 0 || 2962 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2963 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2964 return 2; 2965 return 3; 2966 } 2967 2968 case ARM::STR_PRE_REG: 2969 case ARM::STRB_PRE_REG: { 2970 unsigned ShOpVal = MI.getOperand(4).getImm(); 2971 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 2972 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2973 if (!isSub && 2974 (ShImm == 0 || 2975 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 2976 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 2977 return 2; 2978 return 3; 2979 } 2980 2981 case ARM::LDRH_PRE: 2982 case ARM::STRH_PRE: { 2983 unsigned Rt = MI.getOperand(0).getReg(); 2984 unsigned Rm = MI.getOperand(3).getReg(); 2985 if (!Rm) 2986 return 2; 2987 if (Rt == Rm) 2988 return 3; 2989 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2; 2990 } 2991 2992 case ARM::LDR_POST_REG: 2993 case ARM::LDRB_POST_REG: 2994 case ARM::LDRH_POST: { 2995 unsigned Rt = MI.getOperand(0).getReg(); 2996 unsigned Rm = MI.getOperand(3).getReg(); 2997 return (Rt == Rm) ? 3 : 2; 2998 } 2999 3000 case ARM::LDR_PRE_IMM: 3001 case ARM::LDRB_PRE_IMM: 3002 case ARM::LDR_POST_IMM: 3003 case ARM::LDRB_POST_IMM: 3004 case ARM::STRB_POST_IMM: 3005 case ARM::STRB_POST_REG: 3006 case ARM::STRB_PRE_IMM: 3007 case ARM::STRH_POST: 3008 case ARM::STR_POST_IMM: 3009 case ARM::STR_POST_REG: 3010 case ARM::STR_PRE_IMM: 3011 return 2; 3012 3013 case ARM::LDRSB_PRE: 3014 case ARM::LDRSH_PRE: { 3015 unsigned Rm = MI.getOperand(3).getReg(); 3016 if (Rm == 0) 3017 return 3; 3018 unsigned Rt = MI.getOperand(0).getReg(); 3019 if (Rt == Rm) 3020 return 4; 3021 unsigned ShOpVal = MI.getOperand(4).getImm(); 3022 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3023 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3024 if (!isSub && 3025 (ShImm == 0 || 3026 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3027 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3028 return 3; 3029 return 4; 3030 } 3031 3032 case ARM::LDRD: { 3033 unsigned Rt = MI.getOperand(0).getReg(); 3034 unsigned Rn = MI.getOperand(2).getReg(); 3035 unsigned Rm = MI.getOperand(3).getReg(); 3036 if (Rm) 3037 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3038 : 3; 3039 return (Rt == Rn) ? 3 : 2; 3040 } 3041 3042 case ARM::STRD: { 3043 unsigned Rm = MI.getOperand(3).getReg(); 3044 if (Rm) 3045 return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 3046 : 3; 3047 return 2; 3048 } 3049 3050 case ARM::LDRD_POST: 3051 case ARM::t2LDRD_POST: 3052 return 3; 3053 3054 case ARM::STRD_POST: 3055 case ARM::t2STRD_POST: 3056 return 4; 3057 3058 case ARM::LDRD_PRE: { 3059 unsigned Rt = MI.getOperand(0).getReg(); 3060 unsigned Rn = MI.getOperand(3).getReg(); 3061 unsigned Rm = MI.getOperand(4).getReg(); 3062 if (Rm) 3063 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3064 : 4; 3065 return (Rt == Rn) ? 4 : 3; 3066 } 3067 3068 case ARM::t2LDRD_PRE: { 3069 unsigned Rt = MI.getOperand(0).getReg(); 3070 unsigned Rn = MI.getOperand(3).getReg(); 3071 return (Rt == Rn) ? 4 : 3; 3072 } 3073 3074 case ARM::STRD_PRE: { 3075 unsigned Rm = MI.getOperand(4).getReg(); 3076 if (Rm) 3077 return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 3078 : 4; 3079 return 3; 3080 } 3081 3082 case ARM::t2STRD_PRE: 3083 return 3; 3084 3085 case ARM::t2LDR_POST: 3086 case ARM::t2LDRB_POST: 3087 case ARM::t2LDRB_PRE: 3088 case ARM::t2LDRSBi12: 3089 case ARM::t2LDRSBi8: 3090 case ARM::t2LDRSBpci: 3091 case ARM::t2LDRSBs: 3092 case ARM::t2LDRH_POST: 3093 case ARM::t2LDRH_PRE: 3094 case ARM::t2LDRSBT: 3095 case ARM::t2LDRSB_POST: 3096 case ARM::t2LDRSB_PRE: 3097 case ARM::t2LDRSH_POST: 3098 case ARM::t2LDRSH_PRE: 3099 case ARM::t2LDRSHi12: 3100 case ARM::t2LDRSHi8: 3101 case ARM::t2LDRSHpci: 3102 case ARM::t2LDRSHs: 3103 return 2; 3104 3105 case ARM::t2LDRDi8: { 3106 unsigned Rt = MI.getOperand(0).getReg(); 3107 unsigned Rn = MI.getOperand(2).getReg(); 3108 return (Rt == Rn) ? 3 : 2; 3109 } 3110 3111 case ARM::t2STRB_POST: 3112 case ARM::t2STRB_PRE: 3113 case ARM::t2STRBs: 3114 case ARM::t2STRDi8: 3115 case ARM::t2STRH_POST: 3116 case ARM::t2STRH_PRE: 3117 case ARM::t2STRHs: 3118 case ARM::t2STR_POST: 3119 case ARM::t2STR_PRE: 3120 case ARM::t2STRs: 3121 return 2; 3122 } 3123 } 3124 3125 // Return the number of 32-bit words loaded by LDM or stored by STM. If this 3126 // can't be easily determined return 0 (missing MachineMemOperand). 3127 // 3128 // FIXME: The current MachineInstr design does not support relying on machine 3129 // mem operands to determine the width of a memory access. Instead, we expect 3130 // the target to provide this information based on the instruction opcode and 3131 // operands. However, using MachineMemOperand is the best solution now for 3132 // two reasons: 3133 // 3134 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI 3135 // operands. This is much more dangerous than using the MachineMemOperand 3136 // sizes because CodeGen passes can insert/remove optional machine operands. In 3137 // fact, it's totally incorrect for preRA passes and appears to be wrong for 3138 // postRA passes as well. 3139 // 3140 // 2) getNumLDMAddresses is only used by the scheduling machine model and any 3141 // machine model that calls this should handle the unknown (zero size) case. 3142 // 3143 // Long term, we should require a target hook that verifies MachineMemOperand 3144 // sizes during MC lowering. That target hook should be local to MC lowering 3145 // because we can't ensure that it is aware of other MI forms. Doing this will 3146 // ensure that MachineMemOperands are correctly propagated through all passes. 3147 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { 3148 unsigned Size = 0; 3149 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 3150 E = MI.memoperands_end(); 3151 I != E; ++I) { 3152 Size += (*I)->getSize(); 3153 } 3154 return Size / 4; 3155 } 3156 3157 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc, 3158 unsigned NumRegs) { 3159 unsigned UOps = 1 + NumRegs; // 1 for address computation. 3160 switch (Opc) { 3161 default: 3162 break; 3163 case ARM::VLDMDIA_UPD: 3164 case ARM::VLDMDDB_UPD: 3165 case ARM::VLDMSIA_UPD: 3166 case ARM::VLDMSDB_UPD: 3167 case ARM::VSTMDIA_UPD: 3168 case ARM::VSTMDDB_UPD: 3169 case ARM::VSTMSIA_UPD: 3170 case ARM::VSTMSDB_UPD: 3171 case ARM::LDMIA_UPD: 3172 case ARM::LDMDA_UPD: 3173 case ARM::LDMDB_UPD: 3174 case ARM::LDMIB_UPD: 3175 case ARM::STMIA_UPD: 3176 case ARM::STMDA_UPD: 3177 case ARM::STMDB_UPD: 3178 case ARM::STMIB_UPD: 3179 case ARM::tLDMIA_UPD: 3180 case ARM::tSTMIA_UPD: 3181 case ARM::t2LDMIA_UPD: 3182 case ARM::t2LDMDB_UPD: 3183 case ARM::t2STMIA_UPD: 3184 case ARM::t2STMDB_UPD: 3185 ++UOps; // One for base register writeback. 3186 break; 3187 case ARM::LDMIA_RET: 3188 case ARM::tPOP_RET: 3189 case ARM::t2LDMIA_RET: 3190 UOps += 2; // One for base reg wb, one for write to pc. 3191 break; 3192 } 3193 return UOps; 3194 } 3195 3196 unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 3197 const MachineInstr &MI) const { 3198 if (!ItinData || ItinData->isEmpty()) 3199 return 1; 3200 3201 const MCInstrDesc &Desc = MI.getDesc(); 3202 unsigned Class = Desc.getSchedClass(); 3203 int ItinUOps = ItinData->getNumMicroOps(Class); 3204 if (ItinUOps >= 0) { 3205 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) 3206 return getNumMicroOpsSwiftLdSt(ItinData, MI); 3207 3208 return ItinUOps; 3209 } 3210 3211 unsigned Opc = MI.getOpcode(); 3212 switch (Opc) { 3213 default: 3214 llvm_unreachable("Unexpected multi-uops instruction!"); 3215 case ARM::VLDMQIA: 3216 case ARM::VSTMQIA: 3217 return 2; 3218 3219 // The number of uOps for load / store multiple are determined by the number 3220 // registers. 3221 // 3222 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 3223 // same cycle. The scheduling for the first load / store must be done 3224 // separately by assuming the address is not 64-bit aligned. 3225 // 3226 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 3227 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 3228 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 3229 case ARM::VLDMDIA: 3230 case ARM::VLDMDIA_UPD: 3231 case ARM::VLDMDDB_UPD: 3232 case ARM::VLDMSIA: 3233 case ARM::VLDMSIA_UPD: 3234 case ARM::VLDMSDB_UPD: 3235 case ARM::VSTMDIA: 3236 case ARM::VSTMDIA_UPD: 3237 case ARM::VSTMDDB_UPD: 3238 case ARM::VSTMSIA: 3239 case ARM::VSTMSIA_UPD: 3240 case ARM::VSTMSDB_UPD: { 3241 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); 3242 return (NumRegs / 2) + (NumRegs % 2) + 1; 3243 } 3244 3245 case ARM::LDMIA_RET: 3246 case ARM::LDMIA: 3247 case ARM::LDMDA: 3248 case ARM::LDMDB: 3249 case ARM::LDMIB: 3250 case ARM::LDMIA_UPD: 3251 case ARM::LDMDA_UPD: 3252 case ARM::LDMDB_UPD: 3253 case ARM::LDMIB_UPD: 3254 case ARM::STMIA: 3255 case ARM::STMDA: 3256 case ARM::STMDB: 3257 case ARM::STMIB: 3258 case ARM::STMIA_UPD: 3259 case ARM::STMDA_UPD: 3260 case ARM::STMDB_UPD: 3261 case ARM::STMIB_UPD: 3262 case ARM::tLDMIA: 3263 case ARM::tLDMIA_UPD: 3264 case ARM::tSTMIA_UPD: 3265 case ARM::tPOP_RET: 3266 case ARM::tPOP: 3267 case ARM::tPUSH: 3268 case ARM::t2LDMIA_RET: 3269 case ARM::t2LDMIA: 3270 case ARM::t2LDMDB: 3271 case ARM::t2LDMIA_UPD: 3272 case ARM::t2LDMDB_UPD: 3273 case ARM::t2STMIA: 3274 case ARM::t2STMDB: 3275 case ARM::t2STMIA_UPD: 3276 case ARM::t2STMDB_UPD: { 3277 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; 3278 switch (Subtarget.getLdStMultipleTiming()) { 3279 case ARMSubtarget::SingleIssuePlusExtras: 3280 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); 3281 case ARMSubtarget::SingleIssue: 3282 // Assume the worst. 3283 return NumRegs; 3284 case ARMSubtarget::DoubleIssue: { 3285 if (NumRegs < 4) 3286 return 2; 3287 // 4 registers would be issued: 2, 2. 3288 // 5 registers would be issued: 2, 2, 1. 3289 unsigned UOps = (NumRegs / 2); 3290 if (NumRegs % 2) 3291 ++UOps; 3292 return UOps; 3293 } 3294 case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { 3295 unsigned UOps = (NumRegs / 2); 3296 // If there are odd number of registers or if it's not 64-bit aligned, 3297 // then it takes an extra AGU (Address Generation Unit) cycle. 3298 if ((NumRegs % 2) || !MI.hasOneMemOperand() || 3299 (*MI.memoperands_begin())->getAlignment() < 8) 3300 ++UOps; 3301 return UOps; 3302 } 3303 } 3304 } 3305 } 3306 llvm_unreachable("Didn't find the number of microops"); 3307 } 3308 3309 int 3310 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 3311 const MCInstrDesc &DefMCID, 3312 unsigned DefClass, 3313 unsigned DefIdx, unsigned DefAlign) const { 3314 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3315 if (RegNo <= 0) 3316 // Def is the address writeback. 3317 return ItinData->getOperandCycle(DefClass, DefIdx); 3318 3319 int DefCycle; 3320 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3321 // (regno / 2) + (regno % 2) + 1 3322 DefCycle = RegNo / 2 + 1; 3323 if (RegNo % 2) 3324 ++DefCycle; 3325 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3326 DefCycle = RegNo; 3327 bool isSLoad = false; 3328 3329 switch (DefMCID.getOpcode()) { 3330 default: break; 3331 case ARM::VLDMSIA: 3332 case ARM::VLDMSIA_UPD: 3333 case ARM::VLDMSDB_UPD: 3334 isSLoad = true; 3335 break; 3336 } 3337 3338 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3339 // then it takes an extra cycle. 3340 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 3341 ++DefCycle; 3342 } else { 3343 // Assume the worst. 3344 DefCycle = RegNo + 2; 3345 } 3346 3347 return DefCycle; 3348 } 3349 3350 int 3351 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 3352 const MCInstrDesc &DefMCID, 3353 unsigned DefClass, 3354 unsigned DefIdx, unsigned DefAlign) const { 3355 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 3356 if (RegNo <= 0) 3357 // Def is the address writeback. 3358 return ItinData->getOperandCycle(DefClass, DefIdx); 3359 3360 int DefCycle; 3361 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3362 // 4 registers would be issued: 1, 2, 1. 3363 // 5 registers would be issued: 1, 2, 2. 3364 DefCycle = RegNo / 2; 3365 if (DefCycle < 1) 3366 DefCycle = 1; 3367 // Result latency is issue cycle + 2: E2. 3368 DefCycle += 2; 3369 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3370 DefCycle = (RegNo / 2); 3371 // If there are odd number of registers or if it's not 64-bit aligned, 3372 // then it takes an extra AGU (Address Generation Unit) cycle. 3373 if ((RegNo % 2) || DefAlign < 8) 3374 ++DefCycle; 3375 // Result latency is AGU cycles + 2. 3376 DefCycle += 2; 3377 } else { 3378 // Assume the worst. 3379 DefCycle = RegNo + 2; 3380 } 3381 3382 return DefCycle; 3383 } 3384 3385 int 3386 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 3387 const MCInstrDesc &UseMCID, 3388 unsigned UseClass, 3389 unsigned UseIdx, unsigned UseAlign) const { 3390 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3391 if (RegNo <= 0) 3392 return ItinData->getOperandCycle(UseClass, UseIdx); 3393 3394 int UseCycle; 3395 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3396 // (regno / 2) + (regno % 2) + 1 3397 UseCycle = RegNo / 2 + 1; 3398 if (RegNo % 2) 3399 ++UseCycle; 3400 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3401 UseCycle = RegNo; 3402 bool isSStore = false; 3403 3404 switch (UseMCID.getOpcode()) { 3405 default: break; 3406 case ARM::VSTMSIA: 3407 case ARM::VSTMSIA_UPD: 3408 case ARM::VSTMSDB_UPD: 3409 isSStore = true; 3410 break; 3411 } 3412 3413 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 3414 // then it takes an extra cycle. 3415 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 3416 ++UseCycle; 3417 } else { 3418 // Assume the worst. 3419 UseCycle = RegNo + 2; 3420 } 3421 3422 return UseCycle; 3423 } 3424 3425 int 3426 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 3427 const MCInstrDesc &UseMCID, 3428 unsigned UseClass, 3429 unsigned UseIdx, unsigned UseAlign) const { 3430 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 3431 if (RegNo <= 0) 3432 return ItinData->getOperandCycle(UseClass, UseIdx); 3433 3434 int UseCycle; 3435 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { 3436 UseCycle = RegNo / 2; 3437 if (UseCycle < 2) 3438 UseCycle = 2; 3439 // Read in E3. 3440 UseCycle += 2; 3441 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { 3442 UseCycle = (RegNo / 2); 3443 // If there are odd number of registers or if it's not 64-bit aligned, 3444 // then it takes an extra AGU (Address Generation Unit) cycle. 3445 if ((RegNo % 2) || UseAlign < 8) 3446 ++UseCycle; 3447 } else { 3448 // Assume the worst. 3449 UseCycle = 1; 3450 } 3451 return UseCycle; 3452 } 3453 3454 int 3455 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3456 const MCInstrDesc &DefMCID, 3457 unsigned DefIdx, unsigned DefAlign, 3458 const MCInstrDesc &UseMCID, 3459 unsigned UseIdx, unsigned UseAlign) const { 3460 unsigned DefClass = DefMCID.getSchedClass(); 3461 unsigned UseClass = UseMCID.getSchedClass(); 3462 3463 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 3464 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 3465 3466 // This may be a def / use of a variable_ops instruction, the operand 3467 // latency might be determinable dynamically. Let the target try to 3468 // figure it out. 3469 int DefCycle = -1; 3470 bool LdmBypass = false; 3471 switch (DefMCID.getOpcode()) { 3472 default: 3473 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 3474 break; 3475 3476 case ARM::VLDMDIA: 3477 case ARM::VLDMDIA_UPD: 3478 case ARM::VLDMDDB_UPD: 3479 case ARM::VLDMSIA: 3480 case ARM::VLDMSIA_UPD: 3481 case ARM::VLDMSDB_UPD: 3482 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3483 break; 3484 3485 case ARM::LDMIA_RET: 3486 case ARM::LDMIA: 3487 case ARM::LDMDA: 3488 case ARM::LDMDB: 3489 case ARM::LDMIB: 3490 case ARM::LDMIA_UPD: 3491 case ARM::LDMDA_UPD: 3492 case ARM::LDMDB_UPD: 3493 case ARM::LDMIB_UPD: 3494 case ARM::tLDMIA: 3495 case ARM::tLDMIA_UPD: 3496 case ARM::tPUSH: 3497 case ARM::t2LDMIA_RET: 3498 case ARM::t2LDMIA: 3499 case ARM::t2LDMDB: 3500 case ARM::t2LDMIA_UPD: 3501 case ARM::t2LDMDB_UPD: 3502 LdmBypass = true; 3503 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 3504 break; 3505 } 3506 3507 if (DefCycle == -1) 3508 // We can't seem to determine the result latency of the def, assume it's 2. 3509 DefCycle = 2; 3510 3511 int UseCycle = -1; 3512 switch (UseMCID.getOpcode()) { 3513 default: 3514 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 3515 break; 3516 3517 case ARM::VSTMDIA: 3518 case ARM::VSTMDIA_UPD: 3519 case ARM::VSTMDDB_UPD: 3520 case ARM::VSTMSIA: 3521 case ARM::VSTMSIA_UPD: 3522 case ARM::VSTMSDB_UPD: 3523 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3524 break; 3525 3526 case ARM::STMIA: 3527 case ARM::STMDA: 3528 case ARM::STMDB: 3529 case ARM::STMIB: 3530 case ARM::STMIA_UPD: 3531 case ARM::STMDA_UPD: 3532 case ARM::STMDB_UPD: 3533 case ARM::STMIB_UPD: 3534 case ARM::tSTMIA_UPD: 3535 case ARM::tPOP_RET: 3536 case ARM::tPOP: 3537 case ARM::t2STMIA: 3538 case ARM::t2STMDB: 3539 case ARM::t2STMIA_UPD: 3540 case ARM::t2STMDB_UPD: 3541 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 3542 break; 3543 } 3544 3545 if (UseCycle == -1) 3546 // Assume it's read in the first stage. 3547 UseCycle = 1; 3548 3549 UseCycle = DefCycle - UseCycle + 1; 3550 if (UseCycle > 0) { 3551 if (LdmBypass) { 3552 // It's a variable_ops instruction so we can't use DefIdx here. Just use 3553 // first def operand. 3554 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 3555 UseClass, UseIdx)) 3556 --UseCycle; 3557 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 3558 UseClass, UseIdx)) { 3559 --UseCycle; 3560 } 3561 } 3562 3563 return UseCycle; 3564 } 3565 3566 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, 3567 const MachineInstr *MI, unsigned Reg, 3568 unsigned &DefIdx, unsigned &Dist) { 3569 Dist = 0; 3570 3571 MachineBasicBlock::const_iterator I = MI; ++I; 3572 MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); 3573 assert(II->isInsideBundle() && "Empty bundle?"); 3574 3575 int Idx = -1; 3576 while (II->isInsideBundle()) { 3577 Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); 3578 if (Idx != -1) 3579 break; 3580 --II; 3581 ++Dist; 3582 } 3583 3584 assert(Idx != -1 && "Cannot find bundled definition!"); 3585 DefIdx = Idx; 3586 return &*II; 3587 } 3588 3589 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, 3590 const MachineInstr &MI, unsigned Reg, 3591 unsigned &UseIdx, unsigned &Dist) { 3592 Dist = 0; 3593 3594 MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); 3595 assert(II->isInsideBundle() && "Empty bundle?"); 3596 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 3597 3598 // FIXME: This doesn't properly handle multiple uses. 3599 int Idx = -1; 3600 while (II != E && II->isInsideBundle()) { 3601 Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); 3602 if (Idx != -1) 3603 break; 3604 if (II->getOpcode() != ARM::t2IT) 3605 ++Dist; 3606 ++II; 3607 } 3608 3609 if (Idx == -1) { 3610 Dist = 0; 3611 return nullptr; 3612 } 3613 3614 UseIdx = Idx; 3615 return &*II; 3616 } 3617 3618 /// Return the number of cycles to add to (or subtract from) the static 3619 /// itinerary based on the def opcode and alignment. The caller will ensure that 3620 /// adjusted latency is at least one cycle. 3621 static int adjustDefLatency(const ARMSubtarget &Subtarget, 3622 const MachineInstr &DefMI, 3623 const MCInstrDesc &DefMCID, unsigned DefAlign) { 3624 int Adjust = 0; 3625 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { 3626 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3627 // variants are one cycle cheaper. 3628 switch (DefMCID.getOpcode()) { 3629 default: break; 3630 case ARM::LDRrs: 3631 case ARM::LDRBrs: { 3632 unsigned ShOpVal = DefMI.getOperand(3).getImm(); 3633 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3634 if (ShImm == 0 || 3635 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3636 --Adjust; 3637 break; 3638 } 3639 case ARM::t2LDRs: 3640 case ARM::t2LDRBs: 3641 case ARM::t2LDRHs: 3642 case ARM::t2LDRSHs: { 3643 // Thumb2 mode: lsl only. 3644 unsigned ShAmt = DefMI.getOperand(3).getImm(); 3645 if (ShAmt == 0 || ShAmt == 2) 3646 --Adjust; 3647 break; 3648 } 3649 } 3650 } else if (Subtarget.isSwift()) { 3651 // FIXME: Properly handle all of the latency adjustments for address 3652 // writeback. 3653 switch (DefMCID.getOpcode()) { 3654 default: break; 3655 case ARM::LDRrs: 3656 case ARM::LDRBrs: { 3657 unsigned ShOpVal = DefMI.getOperand(3).getImm(); 3658 bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; 3659 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3660 if (!isSub && 3661 (ShImm == 0 || 3662 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3663 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) 3664 Adjust -= 2; 3665 else if (!isSub && 3666 ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3667 --Adjust; 3668 break; 3669 } 3670 case ARM::t2LDRs: 3671 case ARM::t2LDRBs: 3672 case ARM::t2LDRHs: 3673 case ARM::t2LDRSHs: { 3674 // Thumb2 mode: lsl only. 3675 unsigned ShAmt = DefMI.getOperand(3).getImm(); 3676 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) 3677 Adjust -= 2; 3678 break; 3679 } 3680 } 3681 } 3682 3683 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { 3684 switch (DefMCID.getOpcode()) { 3685 default: break; 3686 case ARM::VLD1q8: 3687 case ARM::VLD1q16: 3688 case ARM::VLD1q32: 3689 case ARM::VLD1q64: 3690 case ARM::VLD1q8wb_fixed: 3691 case ARM::VLD1q16wb_fixed: 3692 case ARM::VLD1q32wb_fixed: 3693 case ARM::VLD1q64wb_fixed: 3694 case ARM::VLD1q8wb_register: 3695 case ARM::VLD1q16wb_register: 3696 case ARM::VLD1q32wb_register: 3697 case ARM::VLD1q64wb_register: 3698 case ARM::VLD2d8: 3699 case ARM::VLD2d16: 3700 case ARM::VLD2d32: 3701 case ARM::VLD2q8: 3702 case ARM::VLD2q16: 3703 case ARM::VLD2q32: 3704 case ARM::VLD2d8wb_fixed: 3705 case ARM::VLD2d16wb_fixed: 3706 case ARM::VLD2d32wb_fixed: 3707 case ARM::VLD2q8wb_fixed: 3708 case ARM::VLD2q16wb_fixed: 3709 case ARM::VLD2q32wb_fixed: 3710 case ARM::VLD2d8wb_register: 3711 case ARM::VLD2d16wb_register: 3712 case ARM::VLD2d32wb_register: 3713 case ARM::VLD2q8wb_register: 3714 case ARM::VLD2q16wb_register: 3715 case ARM::VLD2q32wb_register: 3716 case ARM::VLD3d8: 3717 case ARM::VLD3d16: 3718 case ARM::VLD3d32: 3719 case ARM::VLD1d64T: 3720 case ARM::VLD3d8_UPD: 3721 case ARM::VLD3d16_UPD: 3722 case ARM::VLD3d32_UPD: 3723 case ARM::VLD1d64Twb_fixed: 3724 case ARM::VLD1d64Twb_register: 3725 case ARM::VLD3q8_UPD: 3726 case ARM::VLD3q16_UPD: 3727 case ARM::VLD3q32_UPD: 3728 case ARM::VLD4d8: 3729 case ARM::VLD4d16: 3730 case ARM::VLD4d32: 3731 case ARM::VLD1d64Q: 3732 case ARM::VLD4d8_UPD: 3733 case ARM::VLD4d16_UPD: 3734 case ARM::VLD4d32_UPD: 3735 case ARM::VLD1d64Qwb_fixed: 3736 case ARM::VLD1d64Qwb_register: 3737 case ARM::VLD4q8_UPD: 3738 case ARM::VLD4q16_UPD: 3739 case ARM::VLD4q32_UPD: 3740 case ARM::VLD1DUPq8: 3741 case ARM::VLD1DUPq16: 3742 case ARM::VLD1DUPq32: 3743 case ARM::VLD1DUPq8wb_fixed: 3744 case ARM::VLD1DUPq16wb_fixed: 3745 case ARM::VLD1DUPq32wb_fixed: 3746 case ARM::VLD1DUPq8wb_register: 3747 case ARM::VLD1DUPq16wb_register: 3748 case ARM::VLD1DUPq32wb_register: 3749 case ARM::VLD2DUPd8: 3750 case ARM::VLD2DUPd16: 3751 case ARM::VLD2DUPd32: 3752 case ARM::VLD2DUPd8wb_fixed: 3753 case ARM::VLD2DUPd16wb_fixed: 3754 case ARM::VLD2DUPd32wb_fixed: 3755 case ARM::VLD2DUPd8wb_register: 3756 case ARM::VLD2DUPd16wb_register: 3757 case ARM::VLD2DUPd32wb_register: 3758 case ARM::VLD4DUPd8: 3759 case ARM::VLD4DUPd16: 3760 case ARM::VLD4DUPd32: 3761 case ARM::VLD4DUPd8_UPD: 3762 case ARM::VLD4DUPd16_UPD: 3763 case ARM::VLD4DUPd32_UPD: 3764 case ARM::VLD1LNd8: 3765 case ARM::VLD1LNd16: 3766 case ARM::VLD1LNd32: 3767 case ARM::VLD1LNd8_UPD: 3768 case ARM::VLD1LNd16_UPD: 3769 case ARM::VLD1LNd32_UPD: 3770 case ARM::VLD2LNd8: 3771 case ARM::VLD2LNd16: 3772 case ARM::VLD2LNd32: 3773 case ARM::VLD2LNq16: 3774 case ARM::VLD2LNq32: 3775 case ARM::VLD2LNd8_UPD: 3776 case ARM::VLD2LNd16_UPD: 3777 case ARM::VLD2LNd32_UPD: 3778 case ARM::VLD2LNq16_UPD: 3779 case ARM::VLD2LNq32_UPD: 3780 case ARM::VLD4LNd8: 3781 case ARM::VLD4LNd16: 3782 case ARM::VLD4LNd32: 3783 case ARM::VLD4LNq16: 3784 case ARM::VLD4LNq32: 3785 case ARM::VLD4LNd8_UPD: 3786 case ARM::VLD4LNd16_UPD: 3787 case ARM::VLD4LNd32_UPD: 3788 case ARM::VLD4LNq16_UPD: 3789 case ARM::VLD4LNq32_UPD: 3790 // If the address is not 64-bit aligned, the latencies of these 3791 // instructions increases by one. 3792 ++Adjust; 3793 break; 3794 } 3795 } 3796 return Adjust; 3797 } 3798 3799 int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3800 const MachineInstr &DefMI, 3801 unsigned DefIdx, 3802 const MachineInstr &UseMI, 3803 unsigned UseIdx) const { 3804 // No operand latency. The caller may fall back to getInstrLatency. 3805 if (!ItinData || ItinData->isEmpty()) 3806 return -1; 3807 3808 const MachineOperand &DefMO = DefMI.getOperand(DefIdx); 3809 unsigned Reg = DefMO.getReg(); 3810 3811 const MachineInstr *ResolvedDefMI = &DefMI; 3812 unsigned DefAdj = 0; 3813 if (DefMI.isBundle()) 3814 ResolvedDefMI = 3815 getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj); 3816 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || 3817 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { 3818 return 1; 3819 } 3820 3821 const MachineInstr *ResolvedUseMI = &UseMI; 3822 unsigned UseAdj = 0; 3823 if (UseMI.isBundle()) { 3824 ResolvedUseMI = 3825 getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj); 3826 if (!ResolvedUseMI) 3827 return -1; 3828 } 3829 3830 return getOperandLatencyImpl( 3831 ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO, 3832 Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj); 3833 } 3834 3835 int ARMBaseInstrInfo::getOperandLatencyImpl( 3836 const InstrItineraryData *ItinData, const MachineInstr &DefMI, 3837 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, 3838 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, 3839 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { 3840 if (Reg == ARM::CPSR) { 3841 if (DefMI.getOpcode() == ARM::FMSTAT) { 3842 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 3843 return Subtarget.isLikeA9() ? 1 : 20; 3844 } 3845 3846 // CPSR set and branch can be paired in the same cycle. 3847 if (UseMI.isBranch()) 3848 return 0; 3849 3850 // Otherwise it takes the instruction latency (generally one). 3851 unsigned Latency = getInstrLatency(ItinData, DefMI); 3852 3853 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to 3854 // its uses. Instructions which are otherwise scheduled between them may 3855 // incur a code size penalty (not able to use the CPSR setting 16-bit 3856 // instructions). 3857 if (Latency > 0 && Subtarget.isThumb2()) { 3858 const MachineFunction *MF = DefMI.getParent()->getParent(); 3859 // FIXME: Use Function::optForSize(). 3860 if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) 3861 --Latency; 3862 } 3863 return Latency; 3864 } 3865 3866 if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit()) 3867 return -1; 3868 3869 unsigned DefAlign = DefMI.hasOneMemOperand() 3870 ? (*DefMI.memoperands_begin())->getAlignment() 3871 : 0; 3872 unsigned UseAlign = UseMI.hasOneMemOperand() 3873 ? (*UseMI.memoperands_begin())->getAlignment() 3874 : 0; 3875 3876 // Get the itinerary's latency if possible, and handle variable_ops. 3877 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID, 3878 UseIdx, UseAlign); 3879 // Unable to find operand latency. The caller may resort to getInstrLatency. 3880 if (Latency < 0) 3881 return Latency; 3882 3883 // Adjust for IT block position. 3884 int Adj = DefAdj + UseAdj; 3885 3886 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 3887 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); 3888 if (Adj >= 0 || (int)Latency > -Adj) { 3889 return Latency + Adj; 3890 } 3891 // Return the itinerary latency, which may be zero but not less than zero. 3892 return Latency; 3893 } 3894 3895 int 3896 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 3897 SDNode *DefNode, unsigned DefIdx, 3898 SDNode *UseNode, unsigned UseIdx) const { 3899 if (!DefNode->isMachineOpcode()) 3900 return 1; 3901 3902 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 3903 3904 if (isZeroCost(DefMCID.Opcode)) 3905 return 0; 3906 3907 if (!ItinData || ItinData->isEmpty()) 3908 return DefMCID.mayLoad() ? 3 : 1; 3909 3910 if (!UseNode->isMachineOpcode()) { 3911 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 3912 int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); 3913 int Threshold = 1 + Adj; 3914 return Latency <= Threshold ? 1 : Latency - Adj; 3915 } 3916 3917 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 3918 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 3919 unsigned DefAlign = !DefMN->memoperands_empty() 3920 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 3921 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 3922 unsigned UseAlign = !UseMN->memoperands_empty() 3923 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 3924 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 3925 UseMCID, UseIdx, UseAlign); 3926 3927 if (Latency > 1 && 3928 (Subtarget.isCortexA8() || Subtarget.isLikeA9() || 3929 Subtarget.isCortexA7())) { 3930 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 3931 // variants are one cycle cheaper. 3932 switch (DefMCID.getOpcode()) { 3933 default: break; 3934 case ARM::LDRrs: 3935 case ARM::LDRBrs: { 3936 unsigned ShOpVal = 3937 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3938 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3939 if (ShImm == 0 || 3940 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3941 --Latency; 3942 break; 3943 } 3944 case ARM::t2LDRs: 3945 case ARM::t2LDRBs: 3946 case ARM::t2LDRHs: 3947 case ARM::t2LDRSHs: { 3948 // Thumb2 mode: lsl only. 3949 unsigned ShAmt = 3950 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3951 if (ShAmt == 0 || ShAmt == 2) 3952 --Latency; 3953 break; 3954 } 3955 } 3956 } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { 3957 // FIXME: Properly handle all of the latency adjustments for address 3958 // writeback. 3959 switch (DefMCID.getOpcode()) { 3960 default: break; 3961 case ARM::LDRrs: 3962 case ARM::LDRBrs: { 3963 unsigned ShOpVal = 3964 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 3965 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 3966 if (ShImm == 0 || 3967 ((ShImm == 1 || ShImm == 2 || ShImm == 3) && 3968 ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 3969 Latency -= 2; 3970 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) 3971 --Latency; 3972 break; 3973 } 3974 case ARM::t2LDRs: 3975 case ARM::t2LDRBs: 3976 case ARM::t2LDRHs: 3977 case ARM::t2LDRSHs: 3978 // Thumb2 mode: lsl 0-3 only. 3979 Latency -= 2; 3980 break; 3981 } 3982 } 3983 3984 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) 3985 switch (DefMCID.getOpcode()) { 3986 default: break; 3987 case ARM::VLD1q8: 3988 case ARM::VLD1q16: 3989 case ARM::VLD1q32: 3990 case ARM::VLD1q64: 3991 case ARM::VLD1q8wb_register: 3992 case ARM::VLD1q16wb_register: 3993 case ARM::VLD1q32wb_register: 3994 case ARM::VLD1q64wb_register: 3995 case ARM::VLD1q8wb_fixed: 3996 case ARM::VLD1q16wb_fixed: 3997 case ARM::VLD1q32wb_fixed: 3998 case ARM::VLD1q64wb_fixed: 3999 case ARM::VLD2d8: 4000 case ARM::VLD2d16: 4001 case ARM::VLD2d32: 4002 case ARM::VLD2q8Pseudo: 4003 case ARM::VLD2q16Pseudo: 4004 case ARM::VLD2q32Pseudo: 4005 case ARM::VLD2d8wb_fixed: 4006 case ARM::VLD2d16wb_fixed: 4007 case ARM::VLD2d32wb_fixed: 4008 case ARM::VLD2q8PseudoWB_fixed: 4009 case ARM::VLD2q16PseudoWB_fixed: 4010 case ARM::VLD2q32PseudoWB_fixed: 4011 case ARM::VLD2d8wb_register: 4012 case ARM::VLD2d16wb_register: 4013 case ARM::VLD2d32wb_register: 4014 case ARM::VLD2q8PseudoWB_register: 4015 case ARM::VLD2q16PseudoWB_register: 4016 case ARM::VLD2q32PseudoWB_register: 4017 case ARM::VLD3d8Pseudo: 4018 case ARM::VLD3d16Pseudo: 4019 case ARM::VLD3d32Pseudo: 4020 case ARM::VLD1d64TPseudo: 4021 case ARM::VLD1d64TPseudoWB_fixed: 4022 case ARM::VLD3d8Pseudo_UPD: 4023 case ARM::VLD3d16Pseudo_UPD: 4024 case ARM::VLD3d32Pseudo_UPD: 4025 case ARM::VLD3q8Pseudo_UPD: 4026 case ARM::VLD3q16Pseudo_UPD: 4027 case ARM::VLD3q32Pseudo_UPD: 4028 case ARM::VLD3q8oddPseudo: 4029 case ARM::VLD3q16oddPseudo: 4030 case ARM::VLD3q32oddPseudo: 4031 case ARM::VLD3q8oddPseudo_UPD: 4032 case ARM::VLD3q16oddPseudo_UPD: 4033 case ARM::VLD3q32oddPseudo_UPD: 4034 case ARM::VLD4d8Pseudo: 4035 case ARM::VLD4d16Pseudo: 4036 case ARM::VLD4d32Pseudo: 4037 case ARM::VLD1d64QPseudo: 4038 case ARM::VLD1d64QPseudoWB_fixed: 4039 case ARM::VLD4d8Pseudo_UPD: 4040 case ARM::VLD4d16Pseudo_UPD: 4041 case ARM::VLD4d32Pseudo_UPD: 4042 case ARM::VLD4q8Pseudo_UPD: 4043 case ARM::VLD4q16Pseudo_UPD: 4044 case ARM::VLD4q32Pseudo_UPD: 4045 case ARM::VLD4q8oddPseudo: 4046 case ARM::VLD4q16oddPseudo: 4047 case ARM::VLD4q32oddPseudo: 4048 case ARM::VLD4q8oddPseudo_UPD: 4049 case ARM::VLD4q16oddPseudo_UPD: 4050 case ARM::VLD4q32oddPseudo_UPD: 4051 case ARM::VLD1DUPq8: 4052 case ARM::VLD1DUPq16: 4053 case ARM::VLD1DUPq32: 4054 case ARM::VLD1DUPq8wb_fixed: 4055 case ARM::VLD1DUPq16wb_fixed: 4056 case ARM::VLD1DUPq32wb_fixed: 4057 case ARM::VLD1DUPq8wb_register: 4058 case ARM::VLD1DUPq16wb_register: 4059 case ARM::VLD1DUPq32wb_register: 4060 case ARM::VLD2DUPd8: 4061 case ARM::VLD2DUPd16: 4062 case ARM::VLD2DUPd32: 4063 case ARM::VLD2DUPd8wb_fixed: 4064 case ARM::VLD2DUPd16wb_fixed: 4065 case ARM::VLD2DUPd32wb_fixed: 4066 case ARM::VLD2DUPd8wb_register: 4067 case ARM::VLD2DUPd16wb_register: 4068 case ARM::VLD2DUPd32wb_register: 4069 case ARM::VLD4DUPd8Pseudo: 4070 case ARM::VLD4DUPd16Pseudo: 4071 case ARM::VLD4DUPd32Pseudo: 4072 case ARM::VLD4DUPd8Pseudo_UPD: 4073 case ARM::VLD4DUPd16Pseudo_UPD: 4074 case ARM::VLD4DUPd32Pseudo_UPD: 4075 case ARM::VLD1LNq8Pseudo: 4076 case ARM::VLD1LNq16Pseudo: 4077 case ARM::VLD1LNq32Pseudo: 4078 case ARM::VLD1LNq8Pseudo_UPD: 4079 case ARM::VLD1LNq16Pseudo_UPD: 4080 case ARM::VLD1LNq32Pseudo_UPD: 4081 case ARM::VLD2LNd8Pseudo: 4082 case ARM::VLD2LNd16Pseudo: 4083 case ARM::VLD2LNd32Pseudo: 4084 case ARM::VLD2LNq16Pseudo: 4085 case ARM::VLD2LNq32Pseudo: 4086 case ARM::VLD2LNd8Pseudo_UPD: 4087 case ARM::VLD2LNd16Pseudo_UPD: 4088 case ARM::VLD2LNd32Pseudo_UPD: 4089 case ARM::VLD2LNq16Pseudo_UPD: 4090 case ARM::VLD2LNq32Pseudo_UPD: 4091 case ARM::VLD4LNd8Pseudo: 4092 case ARM::VLD4LNd16Pseudo: 4093 case ARM::VLD4LNd32Pseudo: 4094 case ARM::VLD4LNq16Pseudo: 4095 case ARM::VLD4LNq32Pseudo: 4096 case ARM::VLD4LNd8Pseudo_UPD: 4097 case ARM::VLD4LNd16Pseudo_UPD: 4098 case ARM::VLD4LNd32Pseudo_UPD: 4099 case ARM::VLD4LNq16Pseudo_UPD: 4100 case ARM::VLD4LNq32Pseudo_UPD: 4101 // If the address is not 64-bit aligned, the latencies of these 4102 // instructions increases by one. 4103 ++Latency; 4104 break; 4105 } 4106 4107 return Latency; 4108 } 4109 4110 unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { 4111 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4112 MI.isImplicitDef()) 4113 return 0; 4114 4115 if (MI.isBundle()) 4116 return 0; 4117 4118 const MCInstrDesc &MCID = MI.getDesc(); 4119 4120 if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { 4121 // When predicated, CPSR is an additional source operand for CPSR updating 4122 // instructions, this apparently increases their latencies. 4123 return 1; 4124 } 4125 return 0; 4126 } 4127 4128 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4129 const MachineInstr &MI, 4130 unsigned *PredCost) const { 4131 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || 4132 MI.isImplicitDef()) 4133 return 1; 4134 4135 // An instruction scheduler typically runs on unbundled instructions, however 4136 // other passes may query the latency of a bundled instruction. 4137 if (MI.isBundle()) { 4138 unsigned Latency = 0; 4139 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 4140 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 4141 while (++I != E && I->isInsideBundle()) { 4142 if (I->getOpcode() != ARM::t2IT) 4143 Latency += getInstrLatency(ItinData, *I, PredCost); 4144 } 4145 return Latency; 4146 } 4147 4148 const MCInstrDesc &MCID = MI.getDesc(); 4149 if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { 4150 // When predicated, CPSR is an additional source operand for CPSR updating 4151 // instructions, this apparently increases their latencies. 4152 *PredCost = 1; 4153 } 4154 // Be sure to call getStageLatency for an empty itinerary in case it has a 4155 // valid MinLatency property. 4156 if (!ItinData) 4157 return MI.mayLoad() ? 3 : 1; 4158 4159 unsigned Class = MCID.getSchedClass(); 4160 4161 // For instructions with variable uops, use uops as latency. 4162 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) 4163 return getNumMicroOps(ItinData, MI); 4164 4165 // For the common case, fall back on the itinerary's latency. 4166 unsigned Latency = ItinData->getStageLatency(Class); 4167 4168 // Adjust for dynamic def-side opcode variants not captured by the itinerary. 4169 unsigned DefAlign = 4170 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0; 4171 int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign); 4172 if (Adj >= 0 || (int)Latency > -Adj) { 4173 return Latency + Adj; 4174 } 4175 return Latency; 4176 } 4177 4178 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 4179 SDNode *Node) const { 4180 if (!Node->isMachineOpcode()) 4181 return 1; 4182 4183 if (!ItinData || ItinData->isEmpty()) 4184 return 1; 4185 4186 unsigned Opcode = Node->getMachineOpcode(); 4187 switch (Opcode) { 4188 default: 4189 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 4190 case ARM::VLDMQIA: 4191 case ARM::VSTMQIA: 4192 return 2; 4193 } 4194 } 4195 4196 bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, 4197 const MachineRegisterInfo *MRI, 4198 const MachineInstr &DefMI, 4199 unsigned DefIdx, 4200 const MachineInstr &UseMI, 4201 unsigned UseIdx) const { 4202 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4203 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; 4204 if (Subtarget.nonpipelinedVFP() && 4205 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 4206 return true; 4207 4208 // Hoist VFP / NEON instructions with 4 or higher latency. 4209 unsigned Latency = 4210 SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx); 4211 if (Latency <= 3) 4212 return false; 4213 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 4214 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 4215 } 4216 4217 bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, 4218 const MachineInstr &DefMI, 4219 unsigned DefIdx) const { 4220 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); 4221 if (!ItinData || ItinData->isEmpty()) 4222 return false; 4223 4224 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; 4225 if (DDomain == ARMII::DomainGeneral) { 4226 unsigned DefClass = DefMI.getDesc().getSchedClass(); 4227 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 4228 return (DefCycle != -1 && DefCycle <= 2); 4229 } 4230 return false; 4231 } 4232 4233 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, 4234 StringRef &ErrInfo) const { 4235 if (convertAddSubFlagsOpcode(MI.getOpcode())) { 4236 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 4237 return false; 4238 } 4239 return true; 4240 } 4241 4242 // LoadStackGuard has so far only been implemented for MachO. Different code 4243 // sequence is needed for other targets. 4244 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, 4245 unsigned LoadImmOpc, 4246 unsigned LoadOpc) const { 4247 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && 4248 "ROPI/RWPI not currently supported with stack guard"); 4249 4250 MachineBasicBlock &MBB = *MI->getParent(); 4251 DebugLoc DL = MI->getDebugLoc(); 4252 unsigned Reg = MI->getOperand(0).getReg(); 4253 const GlobalValue *GV = 4254 cast<GlobalValue>((*MI->memoperands_begin())->getValue()); 4255 MachineInstrBuilder MIB; 4256 4257 BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) 4258 .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); 4259 4260 if (Subtarget.isGVIndirectSymbol(GV)) { 4261 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4262 MIB.addReg(Reg, RegState::Kill).addImm(0); 4263 auto Flags = MachineMemOperand::MOLoad | 4264 MachineMemOperand::MODereferenceable | 4265 MachineMemOperand::MOInvariant; 4266 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( 4267 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); 4268 MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); 4269 } 4270 4271 MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); 4272 MIB.addReg(Reg, RegState::Kill) 4273 .addImm(0) 4274 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) 4275 .add(predOps(ARMCC::AL)); 4276 } 4277 4278 bool 4279 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 4280 unsigned &AddSubOpc, 4281 bool &NegAcc, bool &HasLane) const { 4282 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 4283 if (I == MLxEntryMap.end()) 4284 return false; 4285 4286 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 4287 MulOpc = Entry.MulOpc; 4288 AddSubOpc = Entry.AddSubOpc; 4289 NegAcc = Entry.NegAcc; 4290 HasLane = Entry.HasLane; 4291 return true; 4292 } 4293 4294 //===----------------------------------------------------------------------===// 4295 // Execution domains. 4296 //===----------------------------------------------------------------------===// 4297 // 4298 // Some instructions go down the NEON pipeline, some go down the VFP pipeline, 4299 // and some can go down both. The vmov instructions go down the VFP pipeline, 4300 // but they can be changed to vorr equivalents that are executed by the NEON 4301 // pipeline. 4302 // 4303 // We use the following execution domain numbering: 4304 // 4305 enum ARMExeDomain { 4306 ExeGeneric = 0, 4307 ExeVFP = 1, 4308 ExeNEON = 2 4309 }; 4310 4311 // 4312 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 4313 // 4314 std::pair<uint16_t, uint16_t> 4315 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { 4316 // If we don't have access to NEON instructions then we won't be able 4317 // to swizzle anything to the NEON domain. Check to make sure. 4318 if (Subtarget.hasNEON()) { 4319 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON 4320 // if they are not predicated. 4321 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) 4322 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4323 4324 // CortexA9 is particularly picky about mixing the two and wants these 4325 // converted. 4326 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && 4327 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || 4328 MI.getOpcode() == ARM::VMOVS)) 4329 return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); 4330 } 4331 // No other instructions can be swizzled, so just determine their domain. 4332 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; 4333 4334 if (Domain & ARMII::DomainNEON) 4335 return std::make_pair(ExeNEON, 0); 4336 4337 // Certain instructions can go either way on Cortex-A8. 4338 // Treat them as NEON instructions. 4339 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 4340 return std::make_pair(ExeNEON, 0); 4341 4342 if (Domain & ARMII::DomainVFP) 4343 return std::make_pair(ExeVFP, 0); 4344 4345 return std::make_pair(ExeGeneric, 0); 4346 } 4347 4348 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, 4349 unsigned SReg, unsigned &Lane) { 4350 unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); 4351 Lane = 0; 4352 4353 if (DReg != ARM::NoRegister) 4354 return DReg; 4355 4356 Lane = 1; 4357 DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); 4358 4359 assert(DReg && "S-register with no D super-register?"); 4360 return DReg; 4361 } 4362 4363 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, 4364 /// set ImplicitSReg to a register number that must be marked as implicit-use or 4365 /// zero if no register needs to be defined as implicit-use. 4366 /// 4367 /// If the function cannot determine if an SPR should be marked implicit use or 4368 /// not, it returns false. 4369 /// 4370 /// This function handles cases where an instruction is being modified from taking 4371 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict 4372 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other 4373 /// lane of the DPR). 4374 /// 4375 /// If the other SPR is defined, an implicit-use of it should be added. Else, 4376 /// (including the case where the DPR itself is defined), it should not. 4377 /// 4378 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, 4379 MachineInstr &MI, unsigned DReg, 4380 unsigned Lane, unsigned &ImplicitSReg) { 4381 // If the DPR is defined or used already, the other SPR lane will be chained 4382 // correctly, so there is nothing to be done. 4383 if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) { 4384 ImplicitSReg = 0; 4385 return true; 4386 } 4387 4388 // Otherwise we need to go searching to see if the SPR is set explicitly. 4389 ImplicitSReg = TRI->getSubReg(DReg, 4390 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); 4391 MachineBasicBlock::LivenessQueryResult LQR = 4392 MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); 4393 4394 if (LQR == MachineBasicBlock::LQR_Live) 4395 return true; 4396 else if (LQR == MachineBasicBlock::LQR_Unknown) 4397 return false; 4398 4399 // If the register is known not to be live, there is no need to add an 4400 // implicit-use. 4401 ImplicitSReg = 0; 4402 return true; 4403 } 4404 4405 void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, 4406 unsigned Domain) const { 4407 unsigned DstReg, SrcReg, DReg; 4408 unsigned Lane; 4409 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); 4410 const TargetRegisterInfo *TRI = &getRegisterInfo(); 4411 switch (MI.getOpcode()) { 4412 default: 4413 llvm_unreachable("cannot handle opcode!"); 4414 break; 4415 case ARM::VMOVD: 4416 if (Domain != ExeNEON) 4417 break; 4418 4419 // Zap the predicate operands. 4420 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 4421 4422 // Make sure we've got NEON instructions. 4423 assert(Subtarget.hasNEON() && "VORRd requires NEON"); 4424 4425 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) 4426 DstReg = MI.getOperand(0).getReg(); 4427 SrcReg = MI.getOperand(1).getReg(); 4428 4429 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4430 MI.RemoveOperand(i - 1); 4431 4432 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) 4433 MI.setDesc(get(ARM::VORRd)); 4434 MIB.addReg(DstReg, RegState::Define) 4435 .addReg(SrcReg) 4436 .addReg(SrcReg) 4437 .add(predOps(ARMCC::AL)); 4438 break; 4439 case ARM::VMOVRS: 4440 if (Domain != ExeNEON) 4441 break; 4442 assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); 4443 4444 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) 4445 DstReg = MI.getOperand(0).getReg(); 4446 SrcReg = MI.getOperand(1).getReg(); 4447 4448 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4449 MI.RemoveOperand(i - 1); 4450 4451 DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); 4452 4453 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) 4454 // Note that DSrc has been widened and the other lane may be undef, which 4455 // contaminates the entire register. 4456 MI.setDesc(get(ARM::VGETLNi32)); 4457 MIB.addReg(DstReg, RegState::Define) 4458 .addReg(DReg, RegState::Undef) 4459 .addImm(Lane) 4460 .add(predOps(ARMCC::AL)); 4461 4462 // The old source should be an implicit use, otherwise we might think it 4463 // was dead before here. 4464 MIB.addReg(SrcReg, RegState::Implicit); 4465 break; 4466 case ARM::VMOVSR: { 4467 if (Domain != ExeNEON) 4468 break; 4469 assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); 4470 4471 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) 4472 DstReg = MI.getOperand(0).getReg(); 4473 SrcReg = MI.getOperand(1).getReg(); 4474 4475 DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); 4476 4477 unsigned ImplicitSReg; 4478 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) 4479 break; 4480 4481 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4482 MI.RemoveOperand(i - 1); 4483 4484 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) 4485 // Again DDst may be undefined at the beginning of this instruction. 4486 MI.setDesc(get(ARM::VSETLNi32)); 4487 MIB.addReg(DReg, RegState::Define) 4488 .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) 4489 .addReg(SrcReg) 4490 .addImm(Lane) 4491 .add(predOps(ARMCC::AL)); 4492 4493 // The narrower destination must be marked as set to keep previous chains 4494 // in place. 4495 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 4496 if (ImplicitSReg != 0) 4497 MIB.addReg(ImplicitSReg, RegState::Implicit); 4498 break; 4499 } 4500 case ARM::VMOVS: { 4501 if (Domain != ExeNEON) 4502 break; 4503 4504 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) 4505 DstReg = MI.getOperand(0).getReg(); 4506 SrcReg = MI.getOperand(1).getReg(); 4507 4508 unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; 4509 DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); 4510 DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); 4511 4512 unsigned ImplicitSReg; 4513 if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) 4514 break; 4515 4516 for (unsigned i = MI.getDesc().getNumOperands(); i; --i) 4517 MI.RemoveOperand(i - 1); 4518 4519 if (DSrc == DDst) { 4520 // Destination can be: 4521 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) 4522 MI.setDesc(get(ARM::VDUPLN32d)); 4523 MIB.addReg(DDst, RegState::Define) 4524 .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) 4525 .addImm(SrcLane) 4526 .add(predOps(ARMCC::AL)); 4527 4528 // Neither the source or the destination are naturally represented any 4529 // more, so add them in manually. 4530 MIB.addReg(DstReg, RegState::Implicit | RegState::Define); 4531 MIB.addReg(SrcReg, RegState::Implicit); 4532 if (ImplicitSReg != 0) 4533 MIB.addReg(ImplicitSReg, RegState::Implicit); 4534 break; 4535 } 4536 4537 // In general there's no single instruction that can perform an S <-> S 4538 // move in NEON space, but a pair of VEXT instructions *can* do the 4539 // job. It turns out that the VEXTs needed will only use DSrc once, with 4540 // the position based purely on the combination of lane-0 and lane-1 4541 // involved. For example 4542 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 4543 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 4544 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 4545 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 4546 // 4547 // Pattern of the MachineInstrs is: 4548 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) 4549 MachineInstrBuilder NewMIB; 4550 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), 4551 DDst); 4552 4553 // On the first instruction, both DSrc and DDst may be <undef> if present. 4554 // Specifically when the original instruction didn't have them as an 4555 // <imp-use>. 4556 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; 4557 bool CurUndef = !MI.readsRegister(CurReg, TRI); 4558 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); 4559 4560 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; 4561 CurUndef = !MI.readsRegister(CurReg, TRI); 4562 NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) 4563 .addImm(1) 4564 .add(predOps(ARMCC::AL)); 4565 4566 if (SrcLane == DstLane) 4567 NewMIB.addReg(SrcReg, RegState::Implicit); 4568 4569 MI.setDesc(get(ARM::VEXTd32)); 4570 MIB.addReg(DDst, RegState::Define); 4571 4572 // On the second instruction, DDst has definitely been defined above, so 4573 // it is not <undef>. DSrc, if present, can be <undef> as above. 4574 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; 4575 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 4576 MIB.addReg(CurReg, getUndefRegState(CurUndef)); 4577 4578 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; 4579 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); 4580 MIB.addReg(CurReg, getUndefRegState(CurUndef)) 4581 .addImm(1) 4582 .add(predOps(ARMCC::AL)); 4583 4584 if (SrcLane != DstLane) 4585 MIB.addReg(SrcReg, RegState::Implicit); 4586 4587 // As before, the original destination is no longer represented, add it 4588 // implicitly. 4589 MIB.addReg(DstReg, RegState::Define | RegState::Implicit); 4590 if (ImplicitSReg != 0) 4591 MIB.addReg(ImplicitSReg, RegState::Implicit); 4592 break; 4593 } 4594 } 4595 } 4596 4597 //===----------------------------------------------------------------------===// 4598 // Partial register updates 4599 //===----------------------------------------------------------------------===// 4600 // 4601 // Swift renames NEON registers with 64-bit granularity. That means any 4602 // instruction writing an S-reg implicitly reads the containing D-reg. The 4603 // problem is mostly avoided by translating f32 operations to v2f32 operations 4604 // on D-registers, but f32 loads are still a problem. 4605 // 4606 // These instructions can load an f32 into a NEON register: 4607 // 4608 // VLDRS - Only writes S, partial D update. 4609 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. 4610 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. 4611 // 4612 // FCONSTD can be used as a dependency-breaking instruction. 4613 unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( 4614 const MachineInstr &MI, unsigned OpNum, 4615 const TargetRegisterInfo *TRI) const { 4616 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); 4617 if (!PartialUpdateClearance) 4618 return 0; 4619 4620 assert(TRI && "Need TRI instance"); 4621 4622 const MachineOperand &MO = MI.getOperand(OpNum); 4623 if (MO.readsReg()) 4624 return 0; 4625 unsigned Reg = MO.getReg(); 4626 int UseOp = -1; 4627 4628 switch (MI.getOpcode()) { 4629 // Normal instructions writing only an S-register. 4630 case ARM::VLDRS: 4631 case ARM::FCONSTS: 4632 case ARM::VMOVSR: 4633 case ARM::VMOVv8i8: 4634 case ARM::VMOVv4i16: 4635 case ARM::VMOVv2i32: 4636 case ARM::VMOVv2f32: 4637 case ARM::VMOVv1i64: 4638 UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI); 4639 break; 4640 4641 // Explicitly reads the dependency. 4642 case ARM::VLD1LNd32: 4643 UseOp = 3; 4644 break; 4645 default: 4646 return 0; 4647 } 4648 4649 // If this instruction actually reads a value from Reg, there is no unwanted 4650 // dependency. 4651 if (UseOp != -1 && MI.getOperand(UseOp).readsReg()) 4652 return 0; 4653 4654 // We must be able to clobber the whole D-reg. 4655 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 4656 // Virtual register must be a foo:ssub_0<def,undef> operand. 4657 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) 4658 return 0; 4659 } else if (ARM::SPRRegClass.contains(Reg)) { 4660 // Physical register: MI must define the full D-reg. 4661 unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, 4662 &ARM::DPRRegClass); 4663 if (!DReg || !MI.definesRegister(DReg, TRI)) 4664 return 0; 4665 } 4666 4667 // MI has an unwanted D-register dependency. 4668 // Avoid defs in the previous N instructrions. 4669 return PartialUpdateClearance; 4670 } 4671 4672 // Break a partial register dependency after getPartialRegUpdateClearance 4673 // returned non-zero. 4674 void ARMBaseInstrInfo::breakPartialRegDependency( 4675 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { 4676 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def"); 4677 assert(TRI && "Need TRI instance"); 4678 4679 const MachineOperand &MO = MI.getOperand(OpNum); 4680 unsigned Reg = MO.getReg(); 4681 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 4682 "Can't break virtual register dependencies."); 4683 unsigned DReg = Reg; 4684 4685 // If MI defines an S-reg, find the corresponding D super-register. 4686 if (ARM::SPRRegClass.contains(Reg)) { 4687 DReg = ARM::D0 + (Reg - ARM::S0) / 2; 4688 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); 4689 } 4690 4691 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); 4692 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); 4693 4694 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines 4695 // the full D-register by loading the same value to both lanes. The 4696 // instruction is micro-coded with 2 uops, so don't do this until we can 4697 // properly schedule micro-coded instructions. The dispatcher stalls cause 4698 // too big regressions. 4699 4700 // Insert the dependency-breaking FCONSTD before MI. 4701 // 96 is the encoding of 0.5, but the actual value doesn't matter here. 4702 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) 4703 .addImm(96) 4704 .add(predOps(ARMCC::AL)); 4705 MI.addRegisterKilled(DReg, TRI, true); 4706 } 4707 4708 bool ARMBaseInstrInfo::hasNOP() const { 4709 return Subtarget.getFeatureBits()[ARM::HasV6KOps]; 4710 } 4711 4712 bool ARMBaseInstrInfo::isTailCall(const MachineInstr &Inst) const 4713 { 4714 switch (Inst.getOpcode()) { 4715 case ARM::TAILJMPd: 4716 case ARM::TAILJMPr: 4717 case ARM::TCRETURNdi: 4718 case ARM::TCRETURNri: 4719 return true; 4720 default: 4721 return false; 4722 } 4723 } 4724 4725 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { 4726 if (MI->getNumOperands() < 4) 4727 return true; 4728 unsigned ShOpVal = MI->getOperand(3).getImm(); 4729 unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); 4730 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. 4731 if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || 4732 ((ShImm == 1 || ShImm == 2) && 4733 ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) 4734 return true; 4735 4736 return false; 4737 } 4738 4739 bool ARMBaseInstrInfo::getRegSequenceLikeInputs( 4740 const MachineInstr &MI, unsigned DefIdx, 4741 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { 4742 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 4743 assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); 4744 4745 switch (MI.getOpcode()) { 4746 case ARM::VMOVDRR: 4747 // dX = VMOVDRR rY, rZ 4748 // is the same as: 4749 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 4750 // Populate the InputRegs accordingly. 4751 // rY 4752 const MachineOperand *MOReg = &MI.getOperand(1); 4753 InputRegs.push_back( 4754 RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); 4755 // rZ 4756 MOReg = &MI.getOperand(2); 4757 InputRegs.push_back( 4758 RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); 4759 return true; 4760 } 4761 llvm_unreachable("Target dependent opcode missing"); 4762 } 4763 4764 bool ARMBaseInstrInfo::getExtractSubregLikeInputs( 4765 const MachineInstr &MI, unsigned DefIdx, 4766 RegSubRegPairAndIdx &InputReg) const { 4767 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 4768 assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); 4769 4770 switch (MI.getOpcode()) { 4771 case ARM::VMOVRRD: 4772 // rX, rY = VMOVRRD dZ 4773 // is the same as: 4774 // rX = EXTRACT_SUBREG dZ, ssub_0 4775 // rY = EXTRACT_SUBREG dZ, ssub_1 4776 const MachineOperand &MOReg = MI.getOperand(2); 4777 InputReg.Reg = MOReg.getReg(); 4778 InputReg.SubReg = MOReg.getSubReg(); 4779 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; 4780 return true; 4781 } 4782 llvm_unreachable("Target dependent opcode missing"); 4783 } 4784 4785 bool ARMBaseInstrInfo::getInsertSubregLikeInputs( 4786 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, 4787 RegSubRegPairAndIdx &InsertedReg) const { 4788 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); 4789 assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); 4790 4791 switch (MI.getOpcode()) { 4792 case ARM::VSETLNi32: 4793 // dX = VSETLNi32 dY, rZ, imm 4794 const MachineOperand &MOBaseReg = MI.getOperand(1); 4795 const MachineOperand &MOInsertedReg = MI.getOperand(2); 4796 const MachineOperand &MOIndex = MI.getOperand(3); 4797 BaseReg.Reg = MOBaseReg.getReg(); 4798 BaseReg.SubReg = MOBaseReg.getSubReg(); 4799 4800 InsertedReg.Reg = MOInsertedReg.getReg(); 4801 InsertedReg.SubReg = MOInsertedReg.getSubReg(); 4802 InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; 4803 return true; 4804 } 4805 llvm_unreachable("Target dependent opcode missing"); 4806 } 4807