12cab237bSDimitry Andric //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky //                     The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
1097bc6c73SDimitry Andric /// \file This file contains a pass that performs load / store related peephole
1197bc6c73SDimitry Andric /// optimizations. This pass should be run after register allocation.
12f22ef01cSRoman Divacky //
13f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
14f22ef01cSRoman Divacky 
15f22ef01cSRoman Divacky #include "ARM.h"
16f22ef01cSRoman Divacky #include "ARMBaseInstrInfo.h"
17dff0c46cSDimitry Andric #include "ARMBaseRegisterInfo.h"
1891bc56edSDimitry Andric #include "ARMISelLowering.h"
19f22ef01cSRoman Divacky #include "ARMMachineFunctionInfo.h"
2091bc56edSDimitry Andric #include "ARMSubtarget.h"
216122f3e6SDimitry Andric #include "MCTargetDesc/ARMAddressingModes.h"
222cab237bSDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h"
232cab237bSDimitry Andric #include "Utils/ARMBaseInfo.h"
242cab237bSDimitry Andric #include "llvm/ADT/ArrayRef.h"
25139f7f9bSDimitry Andric #include "llvm/ADT/DenseMap.h"
262cab237bSDimitry Andric #include "llvm/ADT/DenseSet.h"
27139f7f9bSDimitry Andric #include "llvm/ADT/STLExtras.h"
28139f7f9bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
29139f7f9bSDimitry Andric #include "llvm/ADT/SmallSet.h"
30139f7f9bSDimitry Andric #include "llvm/ADT/SmallVector.h"
31139f7f9bSDimitry Andric #include "llvm/ADT/Statistic.h"
322cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
332cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
34db17bf38SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
35f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineBasicBlock.h"
362cab237bSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
37f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineFunctionPass.h"
38f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineInstr.h"
39f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineInstrBuilder.h"
402cab237bSDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
412cab237bSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
42f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineRegisterInfo.h"
43875ed548SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
442cab237bSDimitry Andric #include "llvm/CodeGen/TargetFrameLowering.h"
452cab237bSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
462cab237bSDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
472cab237bSDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
482cab237bSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
49139f7f9bSDimitry Andric #include "llvm/IR/DataLayout.h"
502cab237bSDimitry Andric #include "llvm/IR/DebugLoc.h"
51139f7f9bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
52139f7f9bSDimitry Andric #include "llvm/IR/Function.h"
532cab237bSDimitry Andric #include "llvm/IR/Type.h"
542cab237bSDimitry Andric #include "llvm/MC/MCInstrDesc.h"
552cab237bSDimitry Andric #include "llvm/Pass.h"
56875ed548SDimitry Andric #include "llvm/Support/Allocator.h"
572cab237bSDimitry Andric #include "llvm/Support/CommandLine.h"
58139f7f9bSDimitry Andric #include "llvm/Support/Debug.h"
59139f7f9bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
60ff0cc061SDimitry Andric #include "llvm/Support/raw_ostream.h"
612cab237bSDimitry Andric #include <algorithm>
622cab237bSDimitry Andric #include <cassert>
632cab237bSDimitry Andric #include <cstddef>
642cab237bSDimitry Andric #include <cstdlib>
652cab237bSDimitry Andric #include <iterator>
662cab237bSDimitry Andric #include <limits>
672cab237bSDimitry Andric #include <utility>
682cab237bSDimitry Andric 
69f22ef01cSRoman Divacky using namespace llvm;
70f22ef01cSRoman Divacky 
7191bc56edSDimitry Andric #define DEBUG_TYPE "arm-ldst-opt"
7291bc56edSDimitry Andric 
73f22ef01cSRoman Divacky STATISTIC(NumLDMGened , "Number of ldm instructions generated");
74f22ef01cSRoman Divacky STATISTIC(NumSTMGened , "Number of stm instructions generated");
75f22ef01cSRoman Divacky STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
76f22ef01cSRoman Divacky STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
77f22ef01cSRoman Divacky STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
78f22ef01cSRoman Divacky STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
79f22ef01cSRoman Divacky STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
80f22ef01cSRoman Divacky STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
81f22ef01cSRoman Divacky STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
82f22ef01cSRoman Divacky STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
83f22ef01cSRoman Divacky STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
84f22ef01cSRoman Divacky 
853ca95b02SDimitry Andric /// This switch disables formation of double/multi instructions that could
863ca95b02SDimitry Andric /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
873ca95b02SDimitry Andric /// disabled. This can be used to create libraries that are robust even when
883ca95b02SDimitry Andric /// users provoke undefined behaviour by supplying misaligned pointers.
893ca95b02SDimitry Andric /// \see mayCombineMisaligned()
903ca95b02SDimitry Andric static cl::opt<bool>
913ca95b02SDimitry Andric AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
923ca95b02SDimitry Andric     cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
937d523365SDimitry Andric 
947d523365SDimitry Andric #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
957d523365SDimitry Andric 
96f22ef01cSRoman Divacky namespace {
972cab237bSDimitry Andric 
9897bc6c73SDimitry Andric   /// Post- register allocation pass the combine load / store instructions to
9997bc6c73SDimitry Andric   /// form ldm / stm instructions.
100f22ef01cSRoman Divacky   struct ARMLoadStoreOpt : public MachineFunctionPass {
101f22ef01cSRoman Divacky     static char ID;
102f22ef01cSRoman Divacky 
103875ed548SDimitry Andric     const MachineFunction *MF;
104f22ef01cSRoman Divacky     const TargetInstrInfo *TII;
105f22ef01cSRoman Divacky     const TargetRegisterInfo *TRI;
106dff0c46cSDimitry Andric     const ARMSubtarget *STI;
10791bc56edSDimitry Andric     const TargetLowering *TL;
108f22ef01cSRoman Divacky     ARMFunctionInfo *AFI;
109875ed548SDimitry Andric     LivePhysRegs LiveRegs;
110875ed548SDimitry Andric     RegisterClassInfo RegClassInfo;
111875ed548SDimitry Andric     MachineBasicBlock::const_iterator LiveRegPos;
112875ed548SDimitry Andric     bool LiveRegsValid;
113875ed548SDimitry Andric     bool RegClassInfoValid;
11491bc56edSDimitry Andric     bool isThumb1, isThumb2;
115f22ef01cSRoman Divacky 
ARMLoadStoreOpt__anon9023e32a0111::ARMLoadStoreOpt1162cab237bSDimitry Andric     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
1172cab237bSDimitry Andric 
11891bc56edSDimitry Andric     bool runOnMachineFunction(MachineFunction &Fn) override;
119f22ef01cSRoman Divacky 
getRequiredProperties__anon9023e32a0111::ARMLoadStoreOpt1203ca95b02SDimitry Andric     MachineFunctionProperties getRequiredProperties() const override {
1213ca95b02SDimitry Andric       return MachineFunctionProperties().set(
122d88c1a5aSDimitry Andric           MachineFunctionProperties::Property::NoVRegs);
1233ca95b02SDimitry Andric     }
1243ca95b02SDimitry Andric 
getPassName__anon9023e32a0111::ARMLoadStoreOpt125d88c1a5aSDimitry Andric     StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
126f22ef01cSRoman Divacky 
127f22ef01cSRoman Divacky   private:
128875ed548SDimitry Andric     /// A set of load/store MachineInstrs with same base register sorted by
129875ed548SDimitry Andric     /// offset.
130f22ef01cSRoman Divacky     struct MemOpQueueEntry {
131875ed548SDimitry Andric       MachineInstr *MI;
132875ed548SDimitry Andric       int Offset;        ///< Load/Store offset.
133875ed548SDimitry Andric       unsigned Position; ///< Position as counted from end of basic block.
1342cab237bSDimitry Andric 
MemOpQueueEntry__anon9023e32a0111::ARMLoadStoreOpt::MemOpQueueEntry1353ca95b02SDimitry Andric       MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
1363ca95b02SDimitry Andric           : MI(&MI), Offset(Offset), Position(Position) {}
137f22ef01cSRoman Divacky     };
1382cab237bSDimitry Andric     using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
139f22ef01cSRoman Divacky 
140875ed548SDimitry Andric     /// A set of MachineInstrs that fulfill (nearly all) conditions to get
141875ed548SDimitry Andric     /// merged into a LDM/STM.
142875ed548SDimitry Andric     struct MergeCandidate {
143875ed548SDimitry Andric       /// List of instructions ordered by load/store offset.
144875ed548SDimitry Andric       SmallVector<MachineInstr*, 4> Instrs;
1452cab237bSDimitry Andric 
146875ed548SDimitry Andric       /// Index in Instrs of the instruction being latest in the schedule.
147875ed548SDimitry Andric       unsigned LatestMIIdx;
1482cab237bSDimitry Andric 
149875ed548SDimitry Andric       /// Index in Instrs of the instruction being earliest in the schedule.
150875ed548SDimitry Andric       unsigned EarliestMIIdx;
1512cab237bSDimitry Andric 
152875ed548SDimitry Andric       /// Index into the basic block where the merged instruction will be
153875ed548SDimitry Andric       /// inserted. (See MemOpQueueEntry.Position)
154875ed548SDimitry Andric       unsigned InsertPos;
1552cab237bSDimitry Andric 
156875ed548SDimitry Andric       /// Whether the instructions can be merged into a ldm/stm instruction.
157875ed548SDimitry Andric       bool CanMergeToLSMulti;
1582cab237bSDimitry Andric 
159875ed548SDimitry Andric       /// Whether the instructions can be merged into a ldrd/strd instruction.
160875ed548SDimitry Andric       bool CanMergeToLSDouble;
161875ed548SDimitry Andric     };
162875ed548SDimitry Andric     SpecificBumpPtrAllocator<MergeCandidate> Allocator;
163875ed548SDimitry Andric     SmallVector<const MergeCandidate*,4> Candidates;
1647d523365SDimitry Andric     SmallVector<MachineInstr*,4> MergeBaseCandidates;
165875ed548SDimitry Andric 
166875ed548SDimitry Andric     void moveLiveRegsBefore(const MachineBasicBlock &MBB,
167875ed548SDimitry Andric                             MachineBasicBlock::const_iterator Before);
168875ed548SDimitry Andric     unsigned findFreeReg(const TargetRegisterClass &RegClass);
16991bc56edSDimitry Andric     void UpdateBaseRegUses(MachineBasicBlock &MBB,
1703ca95b02SDimitry Andric                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1713ca95b02SDimitry Andric                            unsigned Base, unsigned WordOffset,
17291bc56edSDimitry Andric                            ARMCC::CondCodes Pred, unsigned PredReg);
1733ca95b02SDimitry Andric     MachineInstr *CreateLoadStoreMulti(
1743ca95b02SDimitry Andric         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
1753ca95b02SDimitry Andric         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
1763ca95b02SDimitry Andric         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
1773ca95b02SDimitry Andric         ArrayRef<std::pair<unsigned, bool>> Regs);
1783ca95b02SDimitry Andric     MachineInstr *CreateLoadStoreDouble(
1793ca95b02SDimitry Andric         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
1803ca95b02SDimitry Andric         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
1813ca95b02SDimitry Andric         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
1823ca95b02SDimitry Andric         ArrayRef<std::pair<unsigned, bool>> Regs) const;
183875ed548SDimitry Andric     void FormCandidates(const MemOpQueue &MemOps);
184875ed548SDimitry Andric     MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
185f22ef01cSRoman Divacky     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
186f22ef01cSRoman Divacky                              MachineBasicBlock::iterator &MBBI);
187875ed548SDimitry Andric     bool MergeBaseUpdateLoadStore(MachineInstr *MI);
188875ed548SDimitry Andric     bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
1897d523365SDimitry Andric     bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
190f22ef01cSRoman Divacky     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
191f22ef01cSRoman Divacky     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
1927d523365SDimitry Andric     bool CombineMovBx(MachineBasicBlock &MBB);
193f22ef01cSRoman Divacky   };
1942cab237bSDimitry Andric 
1952cab237bSDimitry Andric } // end anonymous namespace
1962cab237bSDimitry Andric 
197f22ef01cSRoman Divacky char ARMLoadStoreOpt::ID = 0;
198f22ef01cSRoman Divacky 
1993ca95b02SDimitry Andric INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
2003ca95b02SDimitry Andric                 false)
2017d523365SDimitry Andric 
definesCPSR(const MachineInstr & MI)2023ca95b02SDimitry Andric static bool definesCPSR(const MachineInstr &MI) {
2033ca95b02SDimitry Andric   for (const auto &MO : MI.operands()) {
20439d628a0SDimitry Andric     if (!MO.isReg())
20539d628a0SDimitry Andric       continue;
20639d628a0SDimitry Andric     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
20739d628a0SDimitry Andric       // If the instruction has live CPSR def, then it's not safe to fold it
20839d628a0SDimitry Andric       // into load / store.
20939d628a0SDimitry Andric       return true;
21039d628a0SDimitry Andric   }
21139d628a0SDimitry Andric 
21239d628a0SDimitry Andric   return false;
21339d628a0SDimitry Andric }
21439d628a0SDimitry Andric 
getMemoryOpOffset(const MachineInstr & MI)2153ca95b02SDimitry Andric static int getMemoryOpOffset(const MachineInstr &MI) {
2163ca95b02SDimitry Andric   unsigned Opcode = MI.getOpcode();
21739d628a0SDimitry Andric   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
2183ca95b02SDimitry Andric   unsigned NumOperands = MI.getDesc().getNumOperands();
2193ca95b02SDimitry Andric   unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
22039d628a0SDimitry Andric 
22139d628a0SDimitry Andric   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
22239d628a0SDimitry Andric       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
22339d628a0SDimitry Andric       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
22439d628a0SDimitry Andric       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
22539d628a0SDimitry Andric     return OffField;
22639d628a0SDimitry Andric 
22739d628a0SDimitry Andric   // Thumb1 immediate offsets are scaled by 4
228ff0cc061SDimitry Andric   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
229ff0cc061SDimitry Andric       Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
23039d628a0SDimitry Andric     return OffField * 4;
23139d628a0SDimitry Andric 
23239d628a0SDimitry Andric   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
23339d628a0SDimitry Andric     : ARM_AM::getAM5Offset(OffField) * 4;
23439d628a0SDimitry Andric   ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
23539d628a0SDimitry Andric     : ARM_AM::getAM5Op(OffField);
23639d628a0SDimitry Andric 
23739d628a0SDimitry Andric   if (Op == ARM_AM::sub)
23839d628a0SDimitry Andric     return -Offset;
23939d628a0SDimitry Andric 
24039d628a0SDimitry Andric   return Offset;
24139d628a0SDimitry Andric }
24239d628a0SDimitry Andric 
getLoadStoreBaseOp(const MachineInstr & MI)243875ed548SDimitry Andric static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
244875ed548SDimitry Andric   return MI.getOperand(1);
245875ed548SDimitry Andric }
246875ed548SDimitry Andric 
getLoadStoreRegOp(const MachineInstr & MI)247875ed548SDimitry Andric static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
248875ed548SDimitry Andric   return MI.getOperand(0);
249875ed548SDimitry Andric }
250875ed548SDimitry Andric 
getLoadStoreMultipleOpcode(unsigned Opcode,ARM_AM::AMSubMode Mode)251ff0cc061SDimitry Andric static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
252f22ef01cSRoman Divacky   switch (Opcode) {
2532754fe60SDimitry Andric   default: llvm_unreachable("Unhandled opcode!");
2542754fe60SDimitry Andric   case ARM::LDRi12:
255ffd1746dSEd Schouten     ++NumLDMGened;
2562754fe60SDimitry Andric     switch (Mode) {
2572754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
2582754fe60SDimitry Andric     case ARM_AM::ia: return ARM::LDMIA;
2592754fe60SDimitry Andric     case ARM_AM::da: return ARM::LDMDA;
2602754fe60SDimitry Andric     case ARM_AM::db: return ARM::LDMDB;
2612754fe60SDimitry Andric     case ARM_AM::ib: return ARM::LDMIB;
2622754fe60SDimitry Andric     }
2632754fe60SDimitry Andric   case ARM::STRi12:
264ffd1746dSEd Schouten     ++NumSTMGened;
2652754fe60SDimitry Andric     switch (Mode) {
2662754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
2672754fe60SDimitry Andric     case ARM_AM::ia: return ARM::STMIA;
2682754fe60SDimitry Andric     case ARM_AM::da: return ARM::STMDA;
2692754fe60SDimitry Andric     case ARM_AM::db: return ARM::STMDB;
2702754fe60SDimitry Andric     case ARM_AM::ib: return ARM::STMIB;
2712754fe60SDimitry Andric     }
27291bc56edSDimitry Andric   case ARM::tLDRi:
273ff0cc061SDimitry Andric   case ARM::tLDRspi:
27491bc56edSDimitry Andric     // tLDMIA is writeback-only - unless the base register is in the input
27591bc56edSDimitry Andric     // reglist.
27691bc56edSDimitry Andric     ++NumLDMGened;
27791bc56edSDimitry Andric     switch (Mode) {
27891bc56edSDimitry Andric     default: llvm_unreachable("Unhandled submode!");
27991bc56edSDimitry Andric     case ARM_AM::ia: return ARM::tLDMIA;
28091bc56edSDimitry Andric     }
28191bc56edSDimitry Andric   case ARM::tSTRi:
282ff0cc061SDimitry Andric   case ARM::tSTRspi:
28391bc56edSDimitry Andric     // There is no non-writeback tSTMIA either.
28491bc56edSDimitry Andric     ++NumSTMGened;
28591bc56edSDimitry Andric     switch (Mode) {
28691bc56edSDimitry Andric     default: llvm_unreachable("Unhandled submode!");
28791bc56edSDimitry Andric     case ARM_AM::ia: return ARM::tSTMIA_UPD;
28891bc56edSDimitry Andric     }
289f22ef01cSRoman Divacky   case ARM::t2LDRi8:
290f22ef01cSRoman Divacky   case ARM::t2LDRi12:
291ffd1746dSEd Schouten     ++NumLDMGened;
2922754fe60SDimitry Andric     switch (Mode) {
2932754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
2942754fe60SDimitry Andric     case ARM_AM::ia: return ARM::t2LDMIA;
2952754fe60SDimitry Andric     case ARM_AM::db: return ARM::t2LDMDB;
2962754fe60SDimitry Andric     }
297f22ef01cSRoman Divacky   case ARM::t2STRi8:
298f22ef01cSRoman Divacky   case ARM::t2STRi12:
299ffd1746dSEd Schouten     ++NumSTMGened;
3002754fe60SDimitry Andric     switch (Mode) {
3012754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
3022754fe60SDimitry Andric     case ARM_AM::ia: return ARM::t2STMIA;
3032754fe60SDimitry Andric     case ARM_AM::db: return ARM::t2STMDB;
3042754fe60SDimitry Andric     }
305f22ef01cSRoman Divacky   case ARM::VLDRS:
306ffd1746dSEd Schouten     ++NumVLDMGened;
3072754fe60SDimitry Andric     switch (Mode) {
3082754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
3092754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VLDMSIA;
3103b0f4066SDimitry Andric     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
3112754fe60SDimitry Andric     }
312f22ef01cSRoman Divacky   case ARM::VSTRS:
313ffd1746dSEd Schouten     ++NumVSTMGened;
3142754fe60SDimitry Andric     switch (Mode) {
3152754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
3162754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VSTMSIA;
3173b0f4066SDimitry Andric     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
3182754fe60SDimitry Andric     }
319f22ef01cSRoman Divacky   case ARM::VLDRD:
320ffd1746dSEd Schouten     ++NumVLDMGened;
3212754fe60SDimitry Andric     switch (Mode) {
3222754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
3232754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VLDMDIA;
3243b0f4066SDimitry Andric     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
3252754fe60SDimitry Andric     }
326f22ef01cSRoman Divacky   case ARM::VSTRD:
327ffd1746dSEd Schouten     ++NumVSTMGened;
3282754fe60SDimitry Andric     switch (Mode) {
3292754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
3302754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VSTMDIA;
3313b0f4066SDimitry Andric     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
332f22ef01cSRoman Divacky     }
3332754fe60SDimitry Andric   }
334f22ef01cSRoman Divacky }
335f22ef01cSRoman Divacky 
getLoadStoreMultipleSubMode(unsigned Opcode)33697bc6c73SDimitry Andric static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
3372754fe60SDimitry Andric   switch (Opcode) {
3382754fe60SDimitry Andric   default: llvm_unreachable("Unhandled opcode!");
3392754fe60SDimitry Andric   case ARM::LDMIA_RET:
3402754fe60SDimitry Andric   case ARM::LDMIA:
3412754fe60SDimitry Andric   case ARM::LDMIA_UPD:
3422754fe60SDimitry Andric   case ARM::STMIA:
3432754fe60SDimitry Andric   case ARM::STMIA_UPD:
34491bc56edSDimitry Andric   case ARM::tLDMIA:
34591bc56edSDimitry Andric   case ARM::tLDMIA_UPD:
34691bc56edSDimitry Andric   case ARM::tSTMIA_UPD:
3472754fe60SDimitry Andric   case ARM::t2LDMIA_RET:
3482754fe60SDimitry Andric   case ARM::t2LDMIA:
3492754fe60SDimitry Andric   case ARM::t2LDMIA_UPD:
3502754fe60SDimitry Andric   case ARM::t2STMIA:
3512754fe60SDimitry Andric   case ARM::t2STMIA_UPD:
3522754fe60SDimitry Andric   case ARM::VLDMSIA:
3532754fe60SDimitry Andric   case ARM::VLDMSIA_UPD:
3542754fe60SDimitry Andric   case ARM::VSTMSIA:
3552754fe60SDimitry Andric   case ARM::VSTMSIA_UPD:
3562754fe60SDimitry Andric   case ARM::VLDMDIA:
3572754fe60SDimitry Andric   case ARM::VLDMDIA_UPD:
3582754fe60SDimitry Andric   case ARM::VSTMDIA:
3592754fe60SDimitry Andric   case ARM::VSTMDIA_UPD:
3602754fe60SDimitry Andric     return ARM_AM::ia;
3612754fe60SDimitry Andric 
3622754fe60SDimitry Andric   case ARM::LDMDA:
3632754fe60SDimitry Andric   case ARM::LDMDA_UPD:
3642754fe60SDimitry Andric   case ARM::STMDA:
3652754fe60SDimitry Andric   case ARM::STMDA_UPD:
3662754fe60SDimitry Andric     return ARM_AM::da;
3672754fe60SDimitry Andric 
3682754fe60SDimitry Andric   case ARM::LDMDB:
3692754fe60SDimitry Andric   case ARM::LDMDB_UPD:
3702754fe60SDimitry Andric   case ARM::STMDB:
3712754fe60SDimitry Andric   case ARM::STMDB_UPD:
3722754fe60SDimitry Andric   case ARM::t2LDMDB:
3732754fe60SDimitry Andric   case ARM::t2LDMDB_UPD:
3742754fe60SDimitry Andric   case ARM::t2STMDB:
3752754fe60SDimitry Andric   case ARM::t2STMDB_UPD:
3762754fe60SDimitry Andric   case ARM::VLDMSDB_UPD:
3772754fe60SDimitry Andric   case ARM::VSTMSDB_UPD:
3782754fe60SDimitry Andric   case ARM::VLDMDDB_UPD:
3792754fe60SDimitry Andric   case ARM::VSTMDDB_UPD:
3802754fe60SDimitry Andric     return ARM_AM::db;
3812754fe60SDimitry Andric 
3822754fe60SDimitry Andric   case ARM::LDMIB:
3832754fe60SDimitry Andric   case ARM::LDMIB_UPD:
3842754fe60SDimitry Andric   case ARM::STMIB:
3852754fe60SDimitry Andric   case ARM::STMIB_UPD:
3862754fe60SDimitry Andric     return ARM_AM::ib;
3872754fe60SDimitry Andric   }
3882754fe60SDimitry Andric }
3892754fe60SDimitry Andric 
isT1i32Load(unsigned Opc)39091bc56edSDimitry Andric static bool isT1i32Load(unsigned Opc) {
391ff0cc061SDimitry Andric   return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
39291bc56edSDimitry Andric }
39391bc56edSDimitry Andric 
isT2i32Load(unsigned Opc)394f22ef01cSRoman Divacky static bool isT2i32Load(unsigned Opc) {
395f22ef01cSRoman Divacky   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
396f22ef01cSRoman Divacky }
397f22ef01cSRoman Divacky 
isi32Load(unsigned Opc)398f22ef01cSRoman Divacky static bool isi32Load(unsigned Opc) {
39991bc56edSDimitry Andric   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
40091bc56edSDimitry Andric }
40191bc56edSDimitry Andric 
isT1i32Store(unsigned Opc)40291bc56edSDimitry Andric static bool isT1i32Store(unsigned Opc) {
403ff0cc061SDimitry Andric   return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
404f22ef01cSRoman Divacky }
405f22ef01cSRoman Divacky 
isT2i32Store(unsigned Opc)406f22ef01cSRoman Divacky static bool isT2i32Store(unsigned Opc) {
407f22ef01cSRoman Divacky   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
408f22ef01cSRoman Divacky }
409f22ef01cSRoman Divacky 
isi32Store(unsigned Opc)410f22ef01cSRoman Divacky static bool isi32Store(unsigned Opc) {
41191bc56edSDimitry Andric   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
41291bc56edSDimitry Andric }
41391bc56edSDimitry Andric 
isLoadSingle(unsigned Opc)414875ed548SDimitry Andric static bool isLoadSingle(unsigned Opc) {
415875ed548SDimitry Andric   return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
416875ed548SDimitry Andric }
417875ed548SDimitry Andric 
getImmScale(unsigned Opc)41891bc56edSDimitry Andric static unsigned getImmScale(unsigned Opc) {
41991bc56edSDimitry Andric   switch (Opc) {
42091bc56edSDimitry Andric   default: llvm_unreachable("Unhandled opcode!");
42191bc56edSDimitry Andric   case ARM::tLDRi:
42291bc56edSDimitry Andric   case ARM::tSTRi:
423ff0cc061SDimitry Andric   case ARM::tLDRspi:
424ff0cc061SDimitry Andric   case ARM::tSTRspi:
42591bc56edSDimitry Andric     return 1;
42691bc56edSDimitry Andric   case ARM::tLDRHi:
42791bc56edSDimitry Andric   case ARM::tSTRHi:
42891bc56edSDimitry Andric     return 2;
42991bc56edSDimitry Andric   case ARM::tLDRBi:
43091bc56edSDimitry Andric   case ARM::tSTRBi:
43191bc56edSDimitry Andric     return 4;
43291bc56edSDimitry Andric   }
43391bc56edSDimitry Andric }
43491bc56edSDimitry Andric 
getLSMultipleTransferSize(const MachineInstr * MI)435875ed548SDimitry Andric static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
436f22ef01cSRoman Divacky   switch (MI->getOpcode()) {
437f22ef01cSRoman Divacky   default: return 0;
4382754fe60SDimitry Andric   case ARM::LDRi12:
4392754fe60SDimitry Andric   case ARM::STRi12:
44091bc56edSDimitry Andric   case ARM::tLDRi:
44191bc56edSDimitry Andric   case ARM::tSTRi:
442ff0cc061SDimitry Andric   case ARM::tLDRspi:
443ff0cc061SDimitry Andric   case ARM::tSTRspi:
444f22ef01cSRoman Divacky   case ARM::t2LDRi8:
445f22ef01cSRoman Divacky   case ARM::t2LDRi12:
446f22ef01cSRoman Divacky   case ARM::t2STRi8:
447f22ef01cSRoman Divacky   case ARM::t2STRi12:
448f22ef01cSRoman Divacky   case ARM::VLDRS:
449f22ef01cSRoman Divacky   case ARM::VSTRS:
450f22ef01cSRoman Divacky     return 4;
451f22ef01cSRoman Divacky   case ARM::VLDRD:
452f22ef01cSRoman Divacky   case ARM::VSTRD:
453f22ef01cSRoman Divacky     return 8;
4542754fe60SDimitry Andric   case ARM::LDMIA:
4552754fe60SDimitry Andric   case ARM::LDMDA:
4562754fe60SDimitry Andric   case ARM::LDMDB:
4572754fe60SDimitry Andric   case ARM::LDMIB:
4582754fe60SDimitry Andric   case ARM::STMIA:
4592754fe60SDimitry Andric   case ARM::STMDA:
4602754fe60SDimitry Andric   case ARM::STMDB:
4612754fe60SDimitry Andric   case ARM::STMIB:
46291bc56edSDimitry Andric   case ARM::tLDMIA:
46391bc56edSDimitry Andric   case ARM::tLDMIA_UPD:
46491bc56edSDimitry Andric   case ARM::tSTMIA_UPD:
4652754fe60SDimitry Andric   case ARM::t2LDMIA:
4662754fe60SDimitry Andric   case ARM::t2LDMDB:
4672754fe60SDimitry Andric   case ARM::t2STMIA:
4682754fe60SDimitry Andric   case ARM::t2STMDB:
4692754fe60SDimitry Andric   case ARM::VLDMSIA:
4702754fe60SDimitry Andric   case ARM::VSTMSIA:
4712754fe60SDimitry Andric     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
4722754fe60SDimitry Andric   case ARM::VLDMDIA:
4732754fe60SDimitry Andric   case ARM::VSTMDIA:
4742754fe60SDimitry Andric     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
475f22ef01cSRoman Divacky   }
476f22ef01cSRoman Divacky }
477f22ef01cSRoman Divacky 
478875ed548SDimitry Andric /// Update future uses of the base register with the offset introduced
479875ed548SDimitry Andric /// due to writeback. This function only works on Thumb1.
UpdateBaseRegUses(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned Base,unsigned WordOffset,ARMCC::CondCodes Pred,unsigned PredReg)4803ca95b02SDimitry Andric void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
481875ed548SDimitry Andric                                         MachineBasicBlock::iterator MBBI,
4823ca95b02SDimitry Andric                                         const DebugLoc &DL, unsigned Base,
483875ed548SDimitry Andric                                         unsigned WordOffset,
4843ca95b02SDimitry Andric                                         ARMCC::CondCodes Pred,
4853ca95b02SDimitry Andric                                         unsigned PredReg) {
486875ed548SDimitry Andric   assert(isThumb1 && "Can only update base register uses for Thumb1!");
487875ed548SDimitry Andric   // Start updating any instructions with immediate offsets. Insert a SUB before
488875ed548SDimitry Andric   // the first non-updateable instruction (if any).
489875ed548SDimitry Andric   for (; MBBI != MBB.end(); ++MBBI) {
490875ed548SDimitry Andric     bool InsertSub = false;
491875ed548SDimitry Andric     unsigned Opc = MBBI->getOpcode();
492875ed548SDimitry Andric 
493875ed548SDimitry Andric     if (MBBI->readsRegister(Base)) {
494875ed548SDimitry Andric       int Offset;
495875ed548SDimitry Andric       bool IsLoad =
496875ed548SDimitry Andric         Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
497875ed548SDimitry Andric       bool IsStore =
498875ed548SDimitry Andric         Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
499875ed548SDimitry Andric 
500875ed548SDimitry Andric       if (IsLoad || IsStore) {
501875ed548SDimitry Andric         // Loads and stores with immediate offsets can be updated, but only if
502875ed548SDimitry Andric         // the new offset isn't negative.
503875ed548SDimitry Andric         // The MachineOperand containing the offset immediate is the last one
504875ed548SDimitry Andric         // before predicates.
505875ed548SDimitry Andric         MachineOperand &MO =
506875ed548SDimitry Andric           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
507875ed548SDimitry Andric         // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
508875ed548SDimitry Andric         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
509875ed548SDimitry Andric 
510875ed548SDimitry Andric         // If storing the base register, it needs to be reset first.
511875ed548SDimitry Andric         unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
512875ed548SDimitry Andric 
513875ed548SDimitry Andric         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
514875ed548SDimitry Andric           MO.setImm(Offset);
515875ed548SDimitry Andric         else
516875ed548SDimitry Andric           InsertSub = true;
517875ed548SDimitry Andric       } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
5183ca95b02SDimitry Andric                  !definesCPSR(*MBBI)) {
519875ed548SDimitry Andric         // SUBS/ADDS using this register, with a dead def of the CPSR.
520875ed548SDimitry Andric         // Merge it with the update; if the merged offset is too large,
521875ed548SDimitry Andric         // insert a new sub instead.
522875ed548SDimitry Andric         MachineOperand &MO =
523875ed548SDimitry Andric           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
524875ed548SDimitry Andric         Offset = (Opc == ARM::tSUBi8) ?
525875ed548SDimitry Andric           MO.getImm() + WordOffset * 4 :
526875ed548SDimitry Andric           MO.getImm() - WordOffset * 4 ;
527875ed548SDimitry Andric         if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
528875ed548SDimitry Andric           // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
529875ed548SDimitry Andric           // Offset == 0.
530875ed548SDimitry Andric           MO.setImm(Offset);
531875ed548SDimitry Andric           // The base register has now been reset, so exit early.
532875ed548SDimitry Andric           return;
533875ed548SDimitry Andric         } else {
534875ed548SDimitry Andric           InsertSub = true;
535875ed548SDimitry Andric         }
536875ed548SDimitry Andric       } else {
537875ed548SDimitry Andric         // Can't update the instruction.
538875ed548SDimitry Andric         InsertSub = true;
539875ed548SDimitry Andric       }
5403ca95b02SDimitry Andric     } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
541875ed548SDimitry Andric       // Since SUBS sets the condition flags, we can't place the base reset
542875ed548SDimitry Andric       // after an instruction that has a live CPSR def.
543875ed548SDimitry Andric       // The base register might also contain an argument for a function call.
544875ed548SDimitry Andric       InsertSub = true;
545875ed548SDimitry Andric     }
546875ed548SDimitry Andric 
547875ed548SDimitry Andric     if (InsertSub) {
548875ed548SDimitry Andric       // An instruction above couldn't be updated, so insert a sub.
5497a7e6055SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
5507a7e6055SDimitry Andric           .add(t1CondCodeOp(true))
5517a7e6055SDimitry Andric           .addReg(Base)
5527a7e6055SDimitry Andric           .addImm(WordOffset * 4)
5537a7e6055SDimitry Andric           .addImm(Pred)
5547a7e6055SDimitry Andric           .addReg(PredReg);
555875ed548SDimitry Andric       return;
556875ed548SDimitry Andric     }
557875ed548SDimitry Andric 
558875ed548SDimitry Andric     if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
559875ed548SDimitry Andric       // Register got killed. Stop updating.
560875ed548SDimitry Andric       return;
561875ed548SDimitry Andric   }
562875ed548SDimitry Andric 
563875ed548SDimitry Andric   // End of block was reached.
564875ed548SDimitry Andric   if (MBB.succ_size() > 0) {
565875ed548SDimitry Andric     // FIXME: Because of a bug, live registers are sometimes missing from
566875ed548SDimitry Andric     // the successor blocks' live-in sets. This means we can't trust that
567875ed548SDimitry Andric     // information and *always* have to reset at the end of a block.
568875ed548SDimitry Andric     // See PR21029.
569875ed548SDimitry Andric     if (MBBI != MBB.end()) --MBBI;
5707a7e6055SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
5717a7e6055SDimitry Andric         .add(t1CondCodeOp(true))
5727a7e6055SDimitry Andric         .addReg(Base)
5737a7e6055SDimitry Andric         .addImm(WordOffset * 4)
5747a7e6055SDimitry Andric         .addImm(Pred)
5757a7e6055SDimitry Andric         .addReg(PredReg);
576875ed548SDimitry Andric   }
577875ed548SDimitry Andric }
578875ed548SDimitry Andric 
579875ed548SDimitry Andric /// Return the first register of class \p RegClass that is not in \p Regs.
findFreeReg(const TargetRegisterClass & RegClass)580875ed548SDimitry Andric unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
581875ed548SDimitry Andric   if (!RegClassInfoValid) {
582875ed548SDimitry Andric     RegClassInfo.runOnMachineFunction(*MF);
583875ed548SDimitry Andric     RegClassInfoValid = true;
584875ed548SDimitry Andric   }
585875ed548SDimitry Andric 
586875ed548SDimitry Andric   for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
587875ed548SDimitry Andric     if (!LiveRegs.contains(Reg))
588875ed548SDimitry Andric       return Reg;
589875ed548SDimitry Andric   return 0;
590875ed548SDimitry Andric }
591875ed548SDimitry Andric 
592875ed548SDimitry Andric /// Compute live registers just before instruction \p Before (in normal schedule
593875ed548SDimitry Andric /// direction). Computes backwards so multiple queries in the same block must
594875ed548SDimitry Andric /// come in reverse order.
moveLiveRegsBefore(const MachineBasicBlock & MBB,MachineBasicBlock::const_iterator Before)595875ed548SDimitry Andric void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
596875ed548SDimitry Andric     MachineBasicBlock::const_iterator Before) {
597875ed548SDimitry Andric   // Initialize if we never queried in this block.
598875ed548SDimitry Andric   if (!LiveRegsValid) {
599d88c1a5aSDimitry Andric     LiveRegs.init(*TRI);
6003ca95b02SDimitry Andric     LiveRegs.addLiveOuts(MBB);
601875ed548SDimitry Andric     LiveRegPos = MBB.end();
602875ed548SDimitry Andric     LiveRegsValid = true;
603875ed548SDimitry Andric   }
604875ed548SDimitry Andric   // Move backward just before the "Before" position.
605875ed548SDimitry Andric   while (LiveRegPos != Before) {
606875ed548SDimitry Andric     --LiveRegPos;
607875ed548SDimitry Andric     LiveRegs.stepBackward(*LiveRegPos);
608875ed548SDimitry Andric   }
609875ed548SDimitry Andric }
610875ed548SDimitry Andric 
ContainsReg(const ArrayRef<std::pair<unsigned,bool>> & Regs,unsigned Reg)611875ed548SDimitry Andric static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
612875ed548SDimitry Andric                         unsigned Reg) {
613875ed548SDimitry Andric   for (const std::pair<unsigned, bool> &R : Regs)
614875ed548SDimitry Andric     if (R.first == Reg)
615875ed548SDimitry Andric       return true;
616875ed548SDimitry Andric   return false;
617875ed548SDimitry Andric }
618875ed548SDimitry Andric 
619875ed548SDimitry Andric /// Create and insert a LDM or STM with Base as base register and registers in
620875ed548SDimitry Andric /// Regs as the register operands that would be loaded / stored.  It returns
621875ed548SDimitry Andric /// true if the transformation is done.
CreateLoadStoreMulti(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,int Offset,unsigned Base,bool BaseKill,unsigned Opcode,ARMCC::CondCodes Pred,unsigned PredReg,const DebugLoc & DL,ArrayRef<std::pair<unsigned,bool>> Regs)6223ca95b02SDimitry Andric MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
6233ca95b02SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
6243ca95b02SDimitry Andric     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
6253ca95b02SDimitry Andric     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
6263ca95b02SDimitry Andric     ArrayRef<std::pair<unsigned, bool>> Regs) {
627875ed548SDimitry Andric   unsigned NumRegs = Regs.size();
628875ed548SDimitry Andric   assert(NumRegs > 1);
629875ed548SDimitry Andric 
630875ed548SDimitry Andric   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
631875ed548SDimitry Andric   // Compute liveness information for that register to make the decision.
632875ed548SDimitry Andric   bool SafeToClobberCPSR = !isThumb1 ||
633875ed548SDimitry Andric     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
634875ed548SDimitry Andric      MachineBasicBlock::LQR_Dead);
635875ed548SDimitry Andric 
636875ed548SDimitry Andric   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
637875ed548SDimitry Andric 
638875ed548SDimitry Andric   // Exception: If the base register is in the input reglist, Thumb1 LDM is
639875ed548SDimitry Andric   // non-writeback.
640875ed548SDimitry Andric   // It's also not possible to merge an STR of the base register in Thumb1.
6417a7e6055SDimitry Andric   if (isThumb1 && ContainsReg(Regs, Base)) {
642875ed548SDimitry Andric     assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
6437a7e6055SDimitry Andric     if (Opcode == ARM::tLDRi)
644875ed548SDimitry Andric       Writeback = false;
6457a7e6055SDimitry Andric     else if (Opcode == ARM::tSTRi)
646875ed548SDimitry Andric       return nullptr;
647875ed548SDimitry Andric   }
648875ed548SDimitry Andric 
649875ed548SDimitry Andric   ARM_AM::AMSubMode Mode = ARM_AM::ia;
650875ed548SDimitry Andric   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
651875ed548SDimitry Andric   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
652875ed548SDimitry Andric   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
653875ed548SDimitry Andric 
654875ed548SDimitry Andric   if (Offset == 4 && haveIBAndDA) {
655875ed548SDimitry Andric     Mode = ARM_AM::ib;
656875ed548SDimitry Andric   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
657875ed548SDimitry Andric     Mode = ARM_AM::da;
658875ed548SDimitry Andric   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
659875ed548SDimitry Andric     // VLDM/VSTM do not support DB mode without also updating the base reg.
660875ed548SDimitry Andric     Mode = ARM_AM::db;
661875ed548SDimitry Andric   } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
662875ed548SDimitry Andric     // Check if this is a supported opcode before inserting instructions to
663875ed548SDimitry Andric     // calculate a new base register.
664875ed548SDimitry Andric     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
665875ed548SDimitry Andric 
666875ed548SDimitry Andric     // If starting offset isn't zero, insert a MI to materialize a new base.
667875ed548SDimitry Andric     // But only do so if it is cost effective, i.e. merging more than two
668875ed548SDimitry Andric     // loads / stores.
669875ed548SDimitry Andric     if (NumRegs <= 2)
670875ed548SDimitry Andric       return nullptr;
671875ed548SDimitry Andric 
672875ed548SDimitry Andric     // On Thumb1, it's not worth materializing a new base register without
673875ed548SDimitry Andric     // clobbering the CPSR (i.e. not using ADDS/SUBS).
674875ed548SDimitry Andric     if (!SafeToClobberCPSR)
675875ed548SDimitry Andric       return nullptr;
676875ed548SDimitry Andric 
677875ed548SDimitry Andric     unsigned NewBase;
678875ed548SDimitry Andric     if (isi32Load(Opcode)) {
6797d523365SDimitry Andric       // If it is a load, then just use one of the destination registers
6807d523365SDimitry Andric       // as the new base. Will no longer be writeback in Thumb1.
681875ed548SDimitry Andric       NewBase = Regs[NumRegs-1].first;
6827d523365SDimitry Andric       Writeback = false;
683875ed548SDimitry Andric     } else {
684875ed548SDimitry Andric       // Find a free register that we can use as scratch register.
685875ed548SDimitry Andric       moveLiveRegsBefore(MBB, InsertBefore);
686875ed548SDimitry Andric       // The merged instruction does not exist yet but will use several Regs if
687875ed548SDimitry Andric       // it is a Store.
688875ed548SDimitry Andric       if (!isLoadSingle(Opcode))
689875ed548SDimitry Andric         for (const std::pair<unsigned, bool> &R : Regs)
690875ed548SDimitry Andric           LiveRegs.addReg(R.first);
691875ed548SDimitry Andric 
692875ed548SDimitry Andric       NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
693875ed548SDimitry Andric       if (NewBase == 0)
694875ed548SDimitry Andric         return nullptr;
695875ed548SDimitry Andric     }
696875ed548SDimitry Andric 
697875ed548SDimitry Andric     int BaseOpc =
698875ed548SDimitry Andric       isThumb2 ? ARM::t2ADDri :
699875ed548SDimitry Andric       (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
700875ed548SDimitry Andric       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
701875ed548SDimitry Andric       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
702875ed548SDimitry Andric 
703875ed548SDimitry Andric     if (Offset < 0) {
704875ed548SDimitry Andric       Offset = - Offset;
705875ed548SDimitry Andric       BaseOpc =
706875ed548SDimitry Andric         isThumb2 ? ARM::t2SUBri :
707875ed548SDimitry Andric         (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
708875ed548SDimitry Andric         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
709875ed548SDimitry Andric     }
710875ed548SDimitry Andric 
711875ed548SDimitry Andric     if (!TL->isLegalAddImmediate(Offset))
712875ed548SDimitry Andric       // FIXME: Try add with register operand?
713875ed548SDimitry Andric       return nullptr; // Probably not worth it then.
714875ed548SDimitry Andric 
715875ed548SDimitry Andric     // We can only append a kill flag to the add/sub input if the value is not
716875ed548SDimitry Andric     // used in the register list of the stm as well.
717875ed548SDimitry Andric     bool KillOldBase = BaseKill &&
718875ed548SDimitry Andric       (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
719875ed548SDimitry Andric 
720875ed548SDimitry Andric     if (isThumb1) {
721875ed548SDimitry Andric       // Thumb1: depending on immediate size, use either
722875ed548SDimitry Andric       //   ADDS NewBase, Base, #imm3
723875ed548SDimitry Andric       // or
724875ed548SDimitry Andric       //   MOV  NewBase, Base
725875ed548SDimitry Andric       //   ADDS NewBase, #imm8.
726875ed548SDimitry Andric       if (Base != NewBase &&
727875ed548SDimitry Andric           (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
728875ed548SDimitry Andric         // Need to insert a MOV to the new base first.
729875ed548SDimitry Andric         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
730875ed548SDimitry Andric             !STI->hasV6Ops()) {
731875ed548SDimitry Andric           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
732875ed548SDimitry Andric           if (Pred != ARMCC::AL)
733875ed548SDimitry Andric             return nullptr;
734875ed548SDimitry Andric           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
735875ed548SDimitry Andric             .addReg(Base, getKillRegState(KillOldBase));
736875ed548SDimitry Andric         } else
737875ed548SDimitry Andric           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
738875ed548SDimitry Andric               .addReg(Base, getKillRegState(KillOldBase))
7397a7e6055SDimitry Andric               .add(predOps(Pred, PredReg));
740875ed548SDimitry Andric 
741875ed548SDimitry Andric         // The following ADDS/SUBS becomes an update.
742875ed548SDimitry Andric         Base = NewBase;
743875ed548SDimitry Andric         KillOldBase = true;
744875ed548SDimitry Andric       }
745875ed548SDimitry Andric       if (BaseOpc == ARM::tADDrSPi) {
746875ed548SDimitry Andric         assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
747875ed548SDimitry Andric         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7487a7e6055SDimitry Andric             .addReg(Base, getKillRegState(KillOldBase))
7497a7e6055SDimitry Andric             .addImm(Offset / 4)
7507a7e6055SDimitry Andric             .add(predOps(Pred, PredReg));
751875ed548SDimitry Andric       } else
7527a7e6055SDimitry Andric         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7537a7e6055SDimitry Andric             .add(t1CondCodeOp(true))
7547a7e6055SDimitry Andric             .addReg(Base, getKillRegState(KillOldBase))
7557a7e6055SDimitry Andric             .addImm(Offset)
7567a7e6055SDimitry Andric             .add(predOps(Pred, PredReg));
757875ed548SDimitry Andric     } else {
758875ed548SDimitry Andric       BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7597a7e6055SDimitry Andric           .addReg(Base, getKillRegState(KillOldBase))
7607a7e6055SDimitry Andric           .addImm(Offset)
7617a7e6055SDimitry Andric           .add(predOps(Pred, PredReg))
7627a7e6055SDimitry Andric           .add(condCodeOp());
763875ed548SDimitry Andric     }
764875ed548SDimitry Andric     Base = NewBase;
765875ed548SDimitry Andric     BaseKill = true; // New base is always killed straight away.
766875ed548SDimitry Andric   }
767875ed548SDimitry Andric 
768875ed548SDimitry Andric   bool isDef = isLoadSingle(Opcode);
769875ed548SDimitry Andric 
770875ed548SDimitry Andric   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
771875ed548SDimitry Andric   // base register writeback.
772875ed548SDimitry Andric   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
773875ed548SDimitry Andric   if (!Opcode)
774875ed548SDimitry Andric     return nullptr;
775875ed548SDimitry Andric 
776875ed548SDimitry Andric   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
777875ed548SDimitry Andric   // - There is no writeback (LDM of base register),
778875ed548SDimitry Andric   // - the base register is killed by the merged instruction,
779875ed548SDimitry Andric   // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
780875ed548SDimitry Andric   //   to reset the base register.
781875ed548SDimitry Andric   // Otherwise, don't merge.
782875ed548SDimitry Andric   // It's safe to return here since the code to materialize a new base register
783875ed548SDimitry Andric   // above is also conditional on SafeToClobberCPSR.
784875ed548SDimitry Andric   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
785875ed548SDimitry Andric     return nullptr;
786875ed548SDimitry Andric 
787875ed548SDimitry Andric   MachineInstrBuilder MIB;
788875ed548SDimitry Andric 
789875ed548SDimitry Andric   if (Writeback) {
7907d523365SDimitry Andric     assert(isThumb1 && "expected Writeback only inThumb1");
7917d523365SDimitry Andric     if (Opcode == ARM::tLDMIA) {
7927d523365SDimitry Andric       assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
793875ed548SDimitry Andric       // Update tLDMIA with writeback if necessary.
794875ed548SDimitry Andric       Opcode = ARM::tLDMIA_UPD;
7957d523365SDimitry Andric     }
796875ed548SDimitry Andric 
797875ed548SDimitry Andric     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
798875ed548SDimitry Andric 
799875ed548SDimitry Andric     // Thumb1: we might need to set base writeback when building the MI.
800875ed548SDimitry Andric     MIB.addReg(Base, getDefRegState(true))
801875ed548SDimitry Andric        .addReg(Base, getKillRegState(BaseKill));
802875ed548SDimitry Andric 
803875ed548SDimitry Andric     // The base isn't dead after a merged instruction with writeback.
804875ed548SDimitry Andric     // Insert a sub instruction after the newly formed instruction to reset.
805875ed548SDimitry Andric     if (!BaseKill)
806875ed548SDimitry Andric       UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
807875ed548SDimitry Andric   } else {
808875ed548SDimitry Andric     // No writeback, simply build the MachineInstr.
809875ed548SDimitry Andric     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
810875ed548SDimitry Andric     MIB.addReg(Base, getKillRegState(BaseKill));
811875ed548SDimitry Andric   }
812875ed548SDimitry Andric 
813875ed548SDimitry Andric   MIB.addImm(Pred).addReg(PredReg);
814875ed548SDimitry Andric 
815875ed548SDimitry Andric   for (const std::pair<unsigned, bool> &R : Regs)
816875ed548SDimitry Andric     MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
817875ed548SDimitry Andric 
818875ed548SDimitry Andric   return MIB.getInstr();
819875ed548SDimitry Andric }
820875ed548SDimitry Andric 
CreateLoadStoreDouble(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,int Offset,unsigned Base,bool BaseKill,unsigned Opcode,ARMCC::CondCodes Pred,unsigned PredReg,const DebugLoc & DL,ArrayRef<std::pair<unsigned,bool>> Regs) const8213ca95b02SDimitry Andric MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
8223ca95b02SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
8233ca95b02SDimitry Andric     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
8243ca95b02SDimitry Andric     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
8253ca95b02SDimitry Andric     ArrayRef<std::pair<unsigned, bool>> Regs) const {
826875ed548SDimitry Andric   bool IsLoad = isi32Load(Opcode);
827875ed548SDimitry Andric   assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
828875ed548SDimitry Andric   unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
829875ed548SDimitry Andric 
830875ed548SDimitry Andric   assert(Regs.size() == 2);
831875ed548SDimitry Andric   MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
832875ed548SDimitry Andric                                     TII->get(LoadStoreOpcode));
833875ed548SDimitry Andric   if (IsLoad) {
834875ed548SDimitry Andric     MIB.addReg(Regs[0].first, RegState::Define)
835875ed548SDimitry Andric        .addReg(Regs[1].first, RegState::Define);
836875ed548SDimitry Andric   } else {
837875ed548SDimitry Andric     MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
838875ed548SDimitry Andric        .addReg(Regs[1].first, getKillRegState(Regs[1].second));
839875ed548SDimitry Andric   }
840875ed548SDimitry Andric   MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
841875ed548SDimitry Andric   return MIB.getInstr();
842875ed548SDimitry Andric }
843875ed548SDimitry Andric 
844875ed548SDimitry Andric /// Call MergeOps and update MemOps and merges accordingly on success.
MergeOpsUpdate(const MergeCandidate & Cand)845875ed548SDimitry Andric MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
846875ed548SDimitry Andric   const MachineInstr *First = Cand.Instrs.front();
847875ed548SDimitry Andric   unsigned Opcode = First->getOpcode();
848875ed548SDimitry Andric   bool IsLoad = isLoadSingle(Opcode);
849875ed548SDimitry Andric   SmallVector<std::pair<unsigned, bool>, 8> Regs;
850875ed548SDimitry Andric   SmallVector<unsigned, 4> ImpDefs;
851875ed548SDimitry Andric   DenseSet<unsigned> KilledRegs;
8527d523365SDimitry Andric   DenseSet<unsigned> UsedRegs;
853875ed548SDimitry Andric   // Determine list of registers and list of implicit super-register defs.
854875ed548SDimitry Andric   for (const MachineInstr *MI : Cand.Instrs) {
855875ed548SDimitry Andric     const MachineOperand &MO = getLoadStoreRegOp(*MI);
856875ed548SDimitry Andric     unsigned Reg = MO.getReg();
857875ed548SDimitry Andric     bool IsKill = MO.isKill();
858875ed548SDimitry Andric     if (IsKill)
859875ed548SDimitry Andric       KilledRegs.insert(Reg);
860875ed548SDimitry Andric     Regs.push_back(std::make_pair(Reg, IsKill));
8617d523365SDimitry Andric     UsedRegs.insert(Reg);
862875ed548SDimitry Andric 
863875ed548SDimitry Andric     if (IsLoad) {
864875ed548SDimitry Andric       // Collect any implicit defs of super-registers, after merging we can't
865875ed548SDimitry Andric       // be sure anymore that we properly preserved these live ranges and must
866875ed548SDimitry Andric       // removed these implicit operands.
867875ed548SDimitry Andric       for (const MachineOperand &MO : MI->implicit_operands()) {
868875ed548SDimitry Andric         if (!MO.isReg() || !MO.isDef() || MO.isDead())
869875ed548SDimitry Andric           continue;
870875ed548SDimitry Andric         assert(MO.isImplicit());
871875ed548SDimitry Andric         unsigned DefReg = MO.getReg();
872875ed548SDimitry Andric 
873d88c1a5aSDimitry Andric         if (is_contained(ImpDefs, DefReg))
874875ed548SDimitry Andric           continue;
875875ed548SDimitry Andric         // We can ignore cases where the super-reg is read and written.
876875ed548SDimitry Andric         if (MI->readsRegister(DefReg))
877875ed548SDimitry Andric           continue;
878875ed548SDimitry Andric         ImpDefs.push_back(DefReg);
879875ed548SDimitry Andric       }
880875ed548SDimitry Andric     }
881875ed548SDimitry Andric   }
882875ed548SDimitry Andric 
883875ed548SDimitry Andric   // Attempt the merge.
8842cab237bSDimitry Andric   using iterator = MachineBasicBlock::iterator;
8852cab237bSDimitry Andric 
886875ed548SDimitry Andric   MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
887875ed548SDimitry Andric   iterator InsertBefore = std::next(iterator(LatestMI));
888875ed548SDimitry Andric   MachineBasicBlock &MBB = *LatestMI->getParent();
8893ca95b02SDimitry Andric   unsigned Offset = getMemoryOpOffset(*First);
890875ed548SDimitry Andric   unsigned Base = getLoadStoreBaseOp(*First).getReg();
891875ed548SDimitry Andric   bool BaseKill = LatestMI->killsRegister(Base);
892875ed548SDimitry Andric   unsigned PredReg = 0;
8933ca95b02SDimitry Andric   ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
894875ed548SDimitry Andric   DebugLoc DL = First->getDebugLoc();
895875ed548SDimitry Andric   MachineInstr *Merged = nullptr;
896875ed548SDimitry Andric   if (Cand.CanMergeToLSDouble)
897875ed548SDimitry Andric     Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
898875ed548SDimitry Andric                                    Opcode, Pred, PredReg, DL, Regs);
899875ed548SDimitry Andric   if (!Merged && Cand.CanMergeToLSMulti)
900875ed548SDimitry Andric     Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
901875ed548SDimitry Andric                                   Opcode, Pred, PredReg, DL, Regs);
902875ed548SDimitry Andric   if (!Merged)
903875ed548SDimitry Andric     return nullptr;
904875ed548SDimitry Andric 
905875ed548SDimitry Andric   // Determine earliest instruction that will get removed. We then keep an
906875ed548SDimitry Andric   // iterator just above it so the following erases don't invalidated it.
907875ed548SDimitry Andric   iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
908875ed548SDimitry Andric   bool EarliestAtBegin = false;
909875ed548SDimitry Andric   if (EarliestI == MBB.begin()) {
910875ed548SDimitry Andric     EarliestAtBegin = true;
911875ed548SDimitry Andric   } else {
912875ed548SDimitry Andric     EarliestI = std::prev(EarliestI);
913875ed548SDimitry Andric   }
914875ed548SDimitry Andric 
915875ed548SDimitry Andric   // Remove instructions which have been merged.
916875ed548SDimitry Andric   for (MachineInstr *MI : Cand.Instrs)
917875ed548SDimitry Andric     MBB.erase(MI);
918875ed548SDimitry Andric 
919875ed548SDimitry Andric   // Determine range between the earliest removed instruction and the new one.
920875ed548SDimitry Andric   if (EarliestAtBegin)
921875ed548SDimitry Andric     EarliestI = MBB.begin();
922875ed548SDimitry Andric   else
923875ed548SDimitry Andric     EarliestI = std::next(EarliestI);
924875ed548SDimitry Andric   auto FixupRange = make_range(EarliestI, iterator(Merged));
925875ed548SDimitry Andric 
926875ed548SDimitry Andric   if (isLoadSingle(Opcode)) {
927875ed548SDimitry Andric     // If the previous loads defined a super-reg, then we have to mark earlier
928875ed548SDimitry Andric     // operands undef; Replicate the super-reg def on the merged instruction.
929875ed548SDimitry Andric     for (MachineInstr &MI : FixupRange) {
930875ed548SDimitry Andric       for (unsigned &ImpDefReg : ImpDefs) {
931875ed548SDimitry Andric         for (MachineOperand &MO : MI.implicit_operands()) {
932875ed548SDimitry Andric           if (!MO.isReg() || MO.getReg() != ImpDefReg)
933875ed548SDimitry Andric             continue;
934875ed548SDimitry Andric           if (MO.readsReg())
935875ed548SDimitry Andric             MO.setIsUndef();
936875ed548SDimitry Andric           else if (MO.isDef())
937875ed548SDimitry Andric             ImpDefReg = 0;
938875ed548SDimitry Andric         }
939875ed548SDimitry Andric       }
940875ed548SDimitry Andric     }
941875ed548SDimitry Andric 
942875ed548SDimitry Andric     MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
943875ed548SDimitry Andric     for (unsigned ImpDef : ImpDefs)
944875ed548SDimitry Andric       MIB.addReg(ImpDef, RegState::ImplicitDefine);
945875ed548SDimitry Andric   } else {
946875ed548SDimitry Andric     // Remove kill flags: We are possibly storing the values later now.
947875ed548SDimitry Andric     assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
948875ed548SDimitry Andric     for (MachineInstr &MI : FixupRange) {
949875ed548SDimitry Andric       for (MachineOperand &MO : MI.uses()) {
950875ed548SDimitry Andric         if (!MO.isReg() || !MO.isKill())
951875ed548SDimitry Andric           continue;
9527d523365SDimitry Andric         if (UsedRegs.count(MO.getReg()))
953875ed548SDimitry Andric           MO.setIsKill(false);
954875ed548SDimitry Andric       }
955875ed548SDimitry Andric     }
956875ed548SDimitry Andric     assert(ImpDefs.empty());
957875ed548SDimitry Andric   }
958875ed548SDimitry Andric 
959875ed548SDimitry Andric   return Merged;
960875ed548SDimitry Andric }
961875ed548SDimitry Andric 
isValidLSDoubleOffset(int Offset)962875ed548SDimitry Andric static bool isValidLSDoubleOffset(int Offset) {
963875ed548SDimitry Andric   unsigned Value = abs(Offset);
964875ed548SDimitry Andric   // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
965875ed548SDimitry Andric   // multiplied by 4.
966875ed548SDimitry Andric   return (Value % 4) == 0 && Value < 1024;
967875ed548SDimitry Andric }
968875ed548SDimitry Andric 
9693ca95b02SDimitry Andric /// Return true for loads/stores that can be combined to a double/multi
9703ca95b02SDimitry Andric /// operation without increasing the requirements for alignment.
mayCombineMisaligned(const TargetSubtargetInfo & STI,const MachineInstr & MI)9713ca95b02SDimitry Andric static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
9723ca95b02SDimitry Andric                                  const MachineInstr &MI) {
9733ca95b02SDimitry Andric   // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
9743ca95b02SDimitry Andric   // difference.
9753ca95b02SDimitry Andric   unsigned Opcode = MI.getOpcode();
9763ca95b02SDimitry Andric   if (!isi32Load(Opcode) && !isi32Store(Opcode))
9773ca95b02SDimitry Andric     return true;
9783ca95b02SDimitry Andric 
9793ca95b02SDimitry Andric   // Stack pointer alignment is out of the programmers control so we can trust
9803ca95b02SDimitry Andric   // SP-relative loads/stores.
9813ca95b02SDimitry Andric   if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
9823ca95b02SDimitry Andric       STI.getFrameLowering()->getTransientStackAlignment() >= 4)
9833ca95b02SDimitry Andric     return true;
9843ca95b02SDimitry Andric   return false;
9853ca95b02SDimitry Andric }
9863ca95b02SDimitry Andric 
987875ed548SDimitry Andric /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
FormCandidates(const MemOpQueue & MemOps)988875ed548SDimitry Andric void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
989875ed548SDimitry Andric   const MachineInstr *FirstMI = MemOps[0].MI;
990875ed548SDimitry Andric   unsigned Opcode = FirstMI->getOpcode();
991875ed548SDimitry Andric   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
992875ed548SDimitry Andric   unsigned Size = getLSMultipleTransferSize(FirstMI);
993875ed548SDimitry Andric 
994875ed548SDimitry Andric   unsigned SIndex = 0;
995875ed548SDimitry Andric   unsigned EIndex = MemOps.size();
996875ed548SDimitry Andric   do {
997875ed548SDimitry Andric     // Look at the first instruction.
998875ed548SDimitry Andric     const MachineInstr *MI = MemOps[SIndex].MI;
999875ed548SDimitry Andric     int Offset = MemOps[SIndex].Offset;
1000875ed548SDimitry Andric     const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1001875ed548SDimitry Andric     unsigned PReg = PMO.getReg();
10022cab237bSDimitry Andric     unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
10032cab237bSDimitry Andric                                      : TRI->getEncodingValue(PReg);
1004875ed548SDimitry Andric     unsigned Latest = SIndex;
1005875ed548SDimitry Andric     unsigned Earliest = SIndex;
1006875ed548SDimitry Andric     unsigned Count = 1;
1007875ed548SDimitry Andric     bool CanMergeToLSDouble =
1008875ed548SDimitry Andric       STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1009875ed548SDimitry Andric     // ARM errata 602117: LDRD with base in list may result in incorrect base
1010875ed548SDimitry Andric     // register when interrupted or faulted.
1011875ed548SDimitry Andric     if (STI->isCortexM3() && isi32Load(Opcode) &&
1012875ed548SDimitry Andric         PReg == getLoadStoreBaseOp(*MI).getReg())
1013875ed548SDimitry Andric       CanMergeToLSDouble = false;
1014875ed548SDimitry Andric 
1015875ed548SDimitry Andric     bool CanMergeToLSMulti = true;
1016875ed548SDimitry Andric     // On swift vldm/vstm starting with an odd register number as that needs
1017875ed548SDimitry Andric     // more uops than single vldrs.
10183ca95b02SDimitry Andric     if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1019875ed548SDimitry Andric       CanMergeToLSMulti = false;
1020875ed548SDimitry Andric 
1021b6c25e0eSDimitry Andric     // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1022b6c25e0eSDimitry Andric     // deprecated; LDM to PC is fine but cannot happen here.
1023b6c25e0eSDimitry Andric     if (PReg == ARM::SP || PReg == ARM::PC)
1024b6c25e0eSDimitry Andric       CanMergeToLSMulti = CanMergeToLSDouble = false;
1025b6c25e0eSDimitry Andric 
10263ca95b02SDimitry Andric     // Should we be conservative?
10273ca95b02SDimitry Andric     if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
10283ca95b02SDimitry Andric       CanMergeToLSMulti = CanMergeToLSDouble = false;
10293ca95b02SDimitry Andric 
1030*b5893f02SDimitry Andric     // vldm / vstm limit are 32 for S variants, 16 for D variants.
1031*b5893f02SDimitry Andric     unsigned Limit;
1032*b5893f02SDimitry Andric     switch (Opcode) {
1033*b5893f02SDimitry Andric     default:
1034*b5893f02SDimitry Andric       Limit = UINT_MAX;
1035*b5893f02SDimitry Andric       break;
1036*b5893f02SDimitry Andric     case ARM::VLDRD:
1037*b5893f02SDimitry Andric     case ARM::VSTRD:
1038*b5893f02SDimitry Andric       Limit = 16;
1039*b5893f02SDimitry Andric       break;
1040*b5893f02SDimitry Andric     }
1041*b5893f02SDimitry Andric 
1042875ed548SDimitry Andric     // Merge following instructions where possible.
1043875ed548SDimitry Andric     for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1044875ed548SDimitry Andric       int NewOffset = MemOps[I].Offset;
1045875ed548SDimitry Andric       if (NewOffset != Offset + (int)Size)
1046875ed548SDimitry Andric         break;
1047875ed548SDimitry Andric       const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1048875ed548SDimitry Andric       unsigned Reg = MO.getReg();
1049b6c25e0eSDimitry Andric       if (Reg == ARM::SP || Reg == ARM::PC)
1050b6c25e0eSDimitry Andric         break;
1051*b5893f02SDimitry Andric       if (Count == Limit)
1052*b5893f02SDimitry Andric         break;
1053875ed548SDimitry Andric 
1054875ed548SDimitry Andric       // See if the current load/store may be part of a multi load/store.
10552cab237bSDimitry Andric       unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
10562cab237bSDimitry Andric                                      : TRI->getEncodingValue(Reg);
1057875ed548SDimitry Andric       bool PartOfLSMulti = CanMergeToLSMulti;
1058875ed548SDimitry Andric       if (PartOfLSMulti) {
1059875ed548SDimitry Andric         // Register numbers must be in ascending order.
1060b6c25e0eSDimitry Andric         if (RegNum <= PRegNum)
1061875ed548SDimitry Andric           PartOfLSMulti = false;
1062875ed548SDimitry Andric         // For VFP / NEON load/store multiples, the registers must be
1063875ed548SDimitry Andric         // consecutive and within the limit on the number of registers per
1064875ed548SDimitry Andric         // instruction.
1065875ed548SDimitry Andric         else if (!isNotVFP && RegNum != PRegNum+1)
1066875ed548SDimitry Andric           PartOfLSMulti = false;
1067875ed548SDimitry Andric       }
1068875ed548SDimitry Andric       // See if the current load/store may be part of a double load/store.
1069875ed548SDimitry Andric       bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1070875ed548SDimitry Andric 
1071875ed548SDimitry Andric       if (!PartOfLSMulti && !PartOfLSDouble)
1072875ed548SDimitry Andric         break;
1073875ed548SDimitry Andric       CanMergeToLSMulti &= PartOfLSMulti;
1074875ed548SDimitry Andric       CanMergeToLSDouble &= PartOfLSDouble;
1075875ed548SDimitry Andric       // Track MemOp with latest and earliest position (Positions are
1076875ed548SDimitry Andric       // counted in reverse).
1077875ed548SDimitry Andric       unsigned Position = MemOps[I].Position;
1078875ed548SDimitry Andric       if (Position < MemOps[Latest].Position)
1079875ed548SDimitry Andric         Latest = I;
1080875ed548SDimitry Andric       else if (Position > MemOps[Earliest].Position)
1081875ed548SDimitry Andric         Earliest = I;
1082875ed548SDimitry Andric       // Prepare for next MemOp.
1083875ed548SDimitry Andric       Offset += Size;
1084875ed548SDimitry Andric       PRegNum = RegNum;
1085875ed548SDimitry Andric     }
1086875ed548SDimitry Andric 
1087875ed548SDimitry Andric     // Form a candidate from the Ops collected so far.
1088875ed548SDimitry Andric     MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1089875ed548SDimitry Andric     for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1090875ed548SDimitry Andric       Candidate->Instrs.push_back(MemOps[C].MI);
1091875ed548SDimitry Andric     Candidate->LatestMIIdx = Latest - SIndex;
1092875ed548SDimitry Andric     Candidate->EarliestMIIdx = Earliest - SIndex;
1093875ed548SDimitry Andric     Candidate->InsertPos = MemOps[Latest].Position;
1094875ed548SDimitry Andric     if (Count == 1)
1095875ed548SDimitry Andric       CanMergeToLSMulti = CanMergeToLSDouble = false;
1096875ed548SDimitry Andric     Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1097875ed548SDimitry Andric     Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1098875ed548SDimitry Andric     Candidates.push_back(Candidate);
1099875ed548SDimitry Andric     // Continue after the chain.
1100875ed548SDimitry Andric     SIndex += Count;
1101875ed548SDimitry Andric   } while (SIndex < EIndex);
1102875ed548SDimitry Andric }
1103875ed548SDimitry Andric 
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)11042754fe60SDimitry Andric static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
11052754fe60SDimitry Andric                                             ARM_AM::AMSubMode Mode) {
1106f22ef01cSRoman Divacky   switch (Opc) {
1107f22ef01cSRoman Divacky   default: llvm_unreachable("Unhandled opcode!");
11082754fe60SDimitry Andric   case ARM::LDMIA:
11092754fe60SDimitry Andric   case ARM::LDMDA:
11102754fe60SDimitry Andric   case ARM::LDMDB:
11112754fe60SDimitry Andric   case ARM::LDMIB:
11122754fe60SDimitry Andric     switch (Mode) {
11132754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11142754fe60SDimitry Andric     case ARM_AM::ia: return ARM::LDMIA_UPD;
11152754fe60SDimitry Andric     case ARM_AM::ib: return ARM::LDMIB_UPD;
11162754fe60SDimitry Andric     case ARM_AM::da: return ARM::LDMDA_UPD;
11172754fe60SDimitry Andric     case ARM_AM::db: return ARM::LDMDB_UPD;
1118f22ef01cSRoman Divacky     }
11192754fe60SDimitry Andric   case ARM::STMIA:
11202754fe60SDimitry Andric   case ARM::STMDA:
11212754fe60SDimitry Andric   case ARM::STMDB:
11222754fe60SDimitry Andric   case ARM::STMIB:
11232754fe60SDimitry Andric     switch (Mode) {
11242754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11252754fe60SDimitry Andric     case ARM_AM::ia: return ARM::STMIA_UPD;
11262754fe60SDimitry Andric     case ARM_AM::ib: return ARM::STMIB_UPD;
11272754fe60SDimitry Andric     case ARM_AM::da: return ARM::STMDA_UPD;
11282754fe60SDimitry Andric     case ARM_AM::db: return ARM::STMDB_UPD;
11292754fe60SDimitry Andric     }
11302754fe60SDimitry Andric   case ARM::t2LDMIA:
11312754fe60SDimitry Andric   case ARM::t2LDMDB:
11322754fe60SDimitry Andric     switch (Mode) {
11332754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11342754fe60SDimitry Andric     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
11352754fe60SDimitry Andric     case ARM_AM::db: return ARM::t2LDMDB_UPD;
11362754fe60SDimitry Andric     }
11372754fe60SDimitry Andric   case ARM::t2STMIA:
11382754fe60SDimitry Andric   case ARM::t2STMDB:
11392754fe60SDimitry Andric     switch (Mode) {
11402754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11412754fe60SDimitry Andric     case ARM_AM::ia: return ARM::t2STMIA_UPD;
11422754fe60SDimitry Andric     case ARM_AM::db: return ARM::t2STMDB_UPD;
11432754fe60SDimitry Andric     }
11442754fe60SDimitry Andric   case ARM::VLDMSIA:
11452754fe60SDimitry Andric     switch (Mode) {
11462754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11472754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
11482754fe60SDimitry Andric     case ARM_AM::db: return ARM::VLDMSDB_UPD;
11492754fe60SDimitry Andric     }
11502754fe60SDimitry Andric   case ARM::VLDMDIA:
11512754fe60SDimitry Andric     switch (Mode) {
11522754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11532754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
11542754fe60SDimitry Andric     case ARM_AM::db: return ARM::VLDMDDB_UPD;
11552754fe60SDimitry Andric     }
11562754fe60SDimitry Andric   case ARM::VSTMSIA:
11572754fe60SDimitry Andric     switch (Mode) {
11582754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11592754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
11602754fe60SDimitry Andric     case ARM_AM::db: return ARM::VSTMSDB_UPD;
11612754fe60SDimitry Andric     }
11622754fe60SDimitry Andric   case ARM::VSTMDIA:
11632754fe60SDimitry Andric     switch (Mode) {
11642754fe60SDimitry Andric     default: llvm_unreachable("Unhandled submode!");
11652754fe60SDimitry Andric     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
11662754fe60SDimitry Andric     case ARM_AM::db: return ARM::VSTMDDB_UPD;
11672754fe60SDimitry Andric     }
11682754fe60SDimitry Andric   }
1169f22ef01cSRoman Divacky }
1170f22ef01cSRoman Divacky 
11717d523365SDimitry Andric /// Check if the given instruction increments or decrements a register and
11727d523365SDimitry Andric /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
11737d523365SDimitry Andric /// generated by the instruction are possibly read as well.
isIncrementOrDecrement(const MachineInstr & MI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg)11747d523365SDimitry Andric static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
11757d523365SDimitry Andric                                   ARMCC::CondCodes Pred, unsigned PredReg) {
11767d523365SDimitry Andric   bool CheckCPSRDef;
11777d523365SDimitry Andric   int Scale;
11787d523365SDimitry Andric   switch (MI.getOpcode()) {
11797d523365SDimitry Andric   case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
11807d523365SDimitry Andric   case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
11817d523365SDimitry Andric   case ARM::t2SUBri:
11827d523365SDimitry Andric   case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
11837d523365SDimitry Andric   case ARM::t2ADDri:
11847d523365SDimitry Andric   case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
11857d523365SDimitry Andric   case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
11867d523365SDimitry Andric   case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
11877d523365SDimitry Andric   default: return 0;
11887d523365SDimitry Andric   }
11897d523365SDimitry Andric 
11907d523365SDimitry Andric   unsigned MIPredReg;
11917d523365SDimitry Andric   if (MI.getOperand(0).getReg() != Reg ||
11927d523365SDimitry Andric       MI.getOperand(1).getReg() != Reg ||
11933ca95b02SDimitry Andric       getInstrPredicate(MI, MIPredReg) != Pred ||
11947d523365SDimitry Andric       MIPredReg != PredReg)
11957d523365SDimitry Andric     return 0;
11967d523365SDimitry Andric 
11973ca95b02SDimitry Andric   if (CheckCPSRDef && definesCPSR(MI))
11987d523365SDimitry Andric     return 0;
11997d523365SDimitry Andric   return MI.getOperand(2).getImm() * Scale;
12007d523365SDimitry Andric }
12017d523365SDimitry Andric 
12027d523365SDimitry Andric /// Searches for an increment or decrement of \p Reg before \p MBBI.
12037d523365SDimitry Andric static MachineBasicBlock::iterator
findIncDecBefore(MachineBasicBlock::iterator MBBI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg,int & Offset)12047d523365SDimitry Andric findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
12057d523365SDimitry Andric                  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
12067d523365SDimitry Andric   Offset = 0;
12077d523365SDimitry Andric   MachineBasicBlock &MBB = *MBBI->getParent();
12087d523365SDimitry Andric   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
12097d523365SDimitry Andric   MachineBasicBlock::iterator EndMBBI = MBB.end();
12107d523365SDimitry Andric   if (MBBI == BeginMBBI)
12117d523365SDimitry Andric     return EndMBBI;
12127d523365SDimitry Andric 
12137d523365SDimitry Andric   // Skip debug values.
12147d523365SDimitry Andric   MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
12154ba319b5SDimitry Andric   while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
12167d523365SDimitry Andric     --PrevMBBI;
12177d523365SDimitry Andric 
12187d523365SDimitry Andric   Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
12197d523365SDimitry Andric   return Offset == 0 ? EndMBBI : PrevMBBI;
12207d523365SDimitry Andric }
12217d523365SDimitry Andric 
12227d523365SDimitry Andric /// Searches for a increment or decrement of \p Reg after \p MBBI.
12237d523365SDimitry Andric static MachineBasicBlock::iterator
findIncDecAfter(MachineBasicBlock::iterator MBBI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg,int & Offset)12247d523365SDimitry Andric findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
12257d523365SDimitry Andric                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
12267d523365SDimitry Andric   Offset = 0;
12277d523365SDimitry Andric   MachineBasicBlock &MBB = *MBBI->getParent();
12287d523365SDimitry Andric   MachineBasicBlock::iterator EndMBBI = MBB.end();
12297d523365SDimitry Andric   MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
12307d523365SDimitry Andric   // Skip debug values.
12314ba319b5SDimitry Andric   while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
12327d523365SDimitry Andric     ++NextMBBI;
12337d523365SDimitry Andric   if (NextMBBI == EndMBBI)
12347d523365SDimitry Andric     return EndMBBI;
12357d523365SDimitry Andric 
12367d523365SDimitry Andric   Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
12377d523365SDimitry Andric   return Offset == 0 ? EndMBBI : NextMBBI;
12387d523365SDimitry Andric }
12397d523365SDimitry Andric 
124097bc6c73SDimitry Andric /// Fold proceeding/trailing inc/dec of base register into the
124197bc6c73SDimitry Andric /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1242f22ef01cSRoman Divacky ///
1243f22ef01cSRoman Divacky /// stmia rn, <ra, rb, rc>
1244f22ef01cSRoman Divacky /// rn := rn + 4 * 3;
1245f22ef01cSRoman Divacky /// =>
1246f22ef01cSRoman Divacky /// stmia rn!, <ra, rb, rc>
1247f22ef01cSRoman Divacky ///
1248f22ef01cSRoman Divacky /// rn := rn - 4 * 3;
1249f22ef01cSRoman Divacky /// ldmia rn, <ra, rb, rc>
1250f22ef01cSRoman Divacky /// =>
1251f22ef01cSRoman Divacky /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineInstr * MI)1252875ed548SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
125391bc56edSDimitry Andric   // Thumb1 is already using updating loads/stores.
125491bc56edSDimitry Andric   if (isThumb1) return false;
125591bc56edSDimitry Andric 
1256875ed548SDimitry Andric   const MachineOperand &BaseOP = MI->getOperand(0);
1257875ed548SDimitry Andric   unsigned Base = BaseOP.getReg();
1258875ed548SDimitry Andric   bool BaseKill = BaseOP.isKill();
1259f22ef01cSRoman Divacky   unsigned PredReg = 0;
12603ca95b02SDimitry Andric   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1261ff0cc061SDimitry Andric   unsigned Opcode = MI->getOpcode();
1262875ed548SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
1263f22ef01cSRoman Divacky 
1264f22ef01cSRoman Divacky   // Can't use an updating ld/st if the base register is also a dest
1265f22ef01cSRoman Divacky   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
12662754fe60SDimitry Andric   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1267f22ef01cSRoman Divacky     if (MI->getOperand(i).getReg() == Base)
1268f22ef01cSRoman Divacky       return false;
12692754fe60SDimitry Andric 
12707d523365SDimitry Andric   int Bytes = getLSMultipleTransferSize(MI);
1271b6c25e0eSDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
1272b6c25e0eSDimitry Andric   MachineBasicBlock::iterator MBBI(MI);
12737d523365SDimitry Andric   int Offset;
12747d523365SDimitry Andric   MachineBasicBlock::iterator MergeInstr
12757d523365SDimitry Andric     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
12767d523365SDimitry Andric   ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
12777d523365SDimitry Andric   if (Mode == ARM_AM::ia && Offset == -Bytes) {
1278f22ef01cSRoman Divacky     Mode = ARM_AM::db;
12797d523365SDimitry Andric   } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1280f22ef01cSRoman Divacky     Mode = ARM_AM::da;
12817d523365SDimitry Andric   } else {
12827d523365SDimitry Andric     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
12837d523365SDimitry Andric     if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
12843ca95b02SDimitry Andric         ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
12853ca95b02SDimitry Andric 
12863ca95b02SDimitry Andric       // We couldn't find an inc/dec to merge. But if the base is dead, we
12873ca95b02SDimitry Andric       // can still change to a writeback form as that will save us 2 bytes
12883ca95b02SDimitry Andric       // of code size. It can create WAW hazards though, so only do it if
12893ca95b02SDimitry Andric       // we're minimizing code size.
12902cab237bSDimitry Andric       if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
12913ca95b02SDimitry Andric         return false;
12923ca95b02SDimitry Andric 
12933ca95b02SDimitry Andric       bool HighRegsUsed = false;
12943ca95b02SDimitry Andric       for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
12953ca95b02SDimitry Andric         if (MI->getOperand(i).getReg() >= ARM::R8) {
12963ca95b02SDimitry Andric           HighRegsUsed = true;
12973ca95b02SDimitry Andric           break;
12983ca95b02SDimitry Andric         }
12993ca95b02SDimitry Andric 
13003ca95b02SDimitry Andric       if (!HighRegsUsed)
13013ca95b02SDimitry Andric         MergeInstr = MBB.end();
13023ca95b02SDimitry Andric       else
1303b6c25e0eSDimitry Andric         return false;
13047d523365SDimitry Andric     }
13053ca95b02SDimitry Andric   }
13063ca95b02SDimitry Andric   if (MergeInstr != MBB.end())
13077d523365SDimitry Andric     MBB.erase(MergeInstr);
1308f22ef01cSRoman Divacky 
13092754fe60SDimitry Andric   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1310875ed548SDimitry Andric   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1311f22ef01cSRoman Divacky     .addReg(Base, getDefRegState(true)) // WB base register
1312e580952dSDimitry Andric     .addReg(Base, getKillRegState(BaseKill))
1313f22ef01cSRoman Divacky     .addImm(Pred).addReg(PredReg);
13142754fe60SDimitry Andric 
1315f22ef01cSRoman Divacky   // Transfer the rest of operands.
13162754fe60SDimitry Andric   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
13177a7e6055SDimitry Andric     MIB.add(MI->getOperand(OpNum));
13182754fe60SDimitry Andric 
1319f22ef01cSRoman Divacky   // Transfer memoperands.
1320*b5893f02SDimitry Andric   MIB.setMemRefs(MI->memoperands());
1321f22ef01cSRoman Divacky 
1322f22ef01cSRoman Divacky   MBB.erase(MBBI);
1323f22ef01cSRoman Divacky   return true;
1324f22ef01cSRoman Divacky }
1325f22ef01cSRoman Divacky 
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)13262754fe60SDimitry Andric static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
13272754fe60SDimitry Andric                                              ARM_AM::AddrOpc Mode) {
1328f22ef01cSRoman Divacky   switch (Opc) {
13292754fe60SDimitry Andric   case ARM::LDRi12:
13306122f3e6SDimitry Andric     return ARM::LDR_PRE_IMM;
13312754fe60SDimitry Andric   case ARM::STRi12:
13326122f3e6SDimitry Andric     return ARM::STR_PRE_IMM;
13332754fe60SDimitry Andric   case ARM::VLDRS:
13342754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
13352754fe60SDimitry Andric   case ARM::VLDRD:
13362754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
13372754fe60SDimitry Andric   case ARM::VSTRS:
13382754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
13392754fe60SDimitry Andric   case ARM::VSTRD:
13402754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1341f22ef01cSRoman Divacky   case ARM::t2LDRi8:
1342f22ef01cSRoman Divacky   case ARM::t2LDRi12:
1343f22ef01cSRoman Divacky     return ARM::t2LDR_PRE;
1344f22ef01cSRoman Divacky   case ARM::t2STRi8:
1345f22ef01cSRoman Divacky   case ARM::t2STRi12:
1346f22ef01cSRoman Divacky     return ARM::t2STR_PRE;
1347f22ef01cSRoman Divacky   default: llvm_unreachable("Unhandled opcode!");
1348f22ef01cSRoman Divacky   }
1349f22ef01cSRoman Divacky }
1350f22ef01cSRoman Divacky 
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)13512754fe60SDimitry Andric static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
13522754fe60SDimitry Andric                                               ARM_AM::AddrOpc Mode) {
1353f22ef01cSRoman Divacky   switch (Opc) {
13542754fe60SDimitry Andric   case ARM::LDRi12:
13556122f3e6SDimitry Andric     return ARM::LDR_POST_IMM;
13562754fe60SDimitry Andric   case ARM::STRi12:
13576122f3e6SDimitry Andric     return ARM::STR_POST_IMM;
13582754fe60SDimitry Andric   case ARM::VLDRS:
13592754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
13602754fe60SDimitry Andric   case ARM::VLDRD:
13612754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
13622754fe60SDimitry Andric   case ARM::VSTRS:
13632754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
13642754fe60SDimitry Andric   case ARM::VSTRD:
13652754fe60SDimitry Andric     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1366f22ef01cSRoman Divacky   case ARM::t2LDRi8:
1367f22ef01cSRoman Divacky   case ARM::t2LDRi12:
1368f22ef01cSRoman Divacky     return ARM::t2LDR_POST;
1369f22ef01cSRoman Divacky   case ARM::t2STRi8:
1370f22ef01cSRoman Divacky   case ARM::t2STRi12:
1371f22ef01cSRoman Divacky     return ARM::t2STR_POST;
1372f22ef01cSRoman Divacky   default: llvm_unreachable("Unhandled opcode!");
1373f22ef01cSRoman Divacky   }
1374f22ef01cSRoman Divacky }
1375f22ef01cSRoman Divacky 
137697bc6c73SDimitry Andric /// Fold proceeding/trailing inc/dec of base register into the
137797bc6c73SDimitry Andric /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineInstr * MI)1378875ed548SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
137991bc56edSDimitry Andric   // Thumb1 doesn't have updating LDR/STR.
138091bc56edSDimitry Andric   // FIXME: Use LDM/STM with single register instead.
138191bc56edSDimitry Andric   if (isThumb1) return false;
138291bc56edSDimitry Andric 
1383875ed548SDimitry Andric   unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1384875ed548SDimitry Andric   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1385ff0cc061SDimitry Andric   unsigned Opcode = MI->getOpcode();
1386875ed548SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
1387f22ef01cSRoman Divacky   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1388f22ef01cSRoman Divacky                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
13892754fe60SDimitry Andric   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
13902754fe60SDimitry Andric   if (isi32Load(Opcode) || isi32Store(Opcode))
13912754fe60SDimitry Andric     if (MI->getOperand(2).getImm() != 0)
1392f22ef01cSRoman Divacky       return false;
1393f22ef01cSRoman Divacky   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1394f22ef01cSRoman Divacky     return false;
1395f22ef01cSRoman Divacky 
1396f22ef01cSRoman Divacky   // Can't do the merge if the destination register is the same as the would-be
1397f22ef01cSRoman Divacky   // writeback register.
1398139f7f9bSDimitry Andric   if (MI->getOperand(0).getReg() == Base)
1399f22ef01cSRoman Divacky     return false;
1400f22ef01cSRoman Divacky 
1401f22ef01cSRoman Divacky   unsigned PredReg = 0;
14023ca95b02SDimitry Andric   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
14037d523365SDimitry Andric   int Bytes = getLSMultipleTransferSize(MI);
1404875ed548SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
1405875ed548SDimitry Andric   MachineBasicBlock::iterator MBBI(MI);
14067d523365SDimitry Andric   int Offset;
14077d523365SDimitry Andric   MachineBasicBlock::iterator MergeInstr
14087d523365SDimitry Andric     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
14097d523365SDimitry Andric   unsigned NewOpc;
14107d523365SDimitry Andric   if (!isAM5 && Offset == Bytes) {
14117d523365SDimitry Andric     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
14127d523365SDimitry Andric   } else if (Offset == -Bytes) {
14137d523365SDimitry Andric     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
14147d523365SDimitry Andric   } else {
14157d523365SDimitry Andric     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
14167d523365SDimitry Andric     if (Offset == Bytes) {
14177d523365SDimitry Andric       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
14187d523365SDimitry Andric     } else if (!isAM5 && Offset == -Bytes) {
14197d523365SDimitry Andric       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
14207d523365SDimitry Andric     } else
1421b6c25e0eSDimitry Andric       return false;
14227d523365SDimitry Andric   }
14237d523365SDimitry Andric   MBB.erase(MergeInstr);
1424b6c25e0eSDimitry Andric 
14257d523365SDimitry Andric   ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
14267d523365SDimitry Andric 
14277d523365SDimitry Andric   bool isLd = isLoadSingle(Opcode);
1428f22ef01cSRoman Divacky   if (isAM5) {
142991bc56edSDimitry Andric     // VLDM[SD]_UPD, VSTM[SD]_UPD
1430e580952dSDimitry Andric     // (There are no base-updating versions of VLDR/VSTR instructions, but the
1431e580952dSDimitry Andric     // updating load/store-multiple instructions can be used with only one
1432e580952dSDimitry Andric     // register.)
1433f22ef01cSRoman Divacky     MachineOperand &MO = MI->getOperand(0);
1434875ed548SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1435f22ef01cSRoman Divacky       .addReg(Base, getDefRegState(true)) // WB base register
1436f22ef01cSRoman Divacky       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1437f22ef01cSRoman Divacky       .addImm(Pred).addReg(PredReg)
1438f22ef01cSRoman Divacky       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1439f22ef01cSRoman Divacky                             getKillRegState(MO.isKill())));
1440f22ef01cSRoman Divacky   } else if (isLd) {
14416122f3e6SDimitry Andric     if (isAM2) {
14426122f3e6SDimitry Andric       // LDR_PRE, LDR_POST
14436122f3e6SDimitry Andric       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1444875ed548SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1445f22ef01cSRoman Divacky           .addReg(Base, RegState::Define)
1446f22ef01cSRoman Divacky           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1447f22ef01cSRoman Divacky       } else {
14487d523365SDimitry Andric         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1449875ed548SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
14506122f3e6SDimitry Andric             .addReg(Base, RegState::Define)
14517a7e6055SDimitry Andric             .addReg(Base)
14527a7e6055SDimitry Andric             .addReg(0)
14537a7e6055SDimitry Andric             .addImm(Imm)
14547a7e6055SDimitry Andric             .add(predOps(Pred, PredReg));
14556122f3e6SDimitry Andric       }
14566122f3e6SDimitry Andric     } else {
14576122f3e6SDimitry Andric       // t2LDR_PRE, t2LDR_POST
1458875ed548SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
14596122f3e6SDimitry Andric           .addReg(Base, RegState::Define)
14607a7e6055SDimitry Andric           .addReg(Base)
14617a7e6055SDimitry Andric           .addImm(Offset)
14627a7e6055SDimitry Andric           .add(predOps(Pred, PredReg));
14636122f3e6SDimitry Andric     }
14646122f3e6SDimitry Andric   } else {
1465f22ef01cSRoman Divacky     MachineOperand &MO = MI->getOperand(0);
14666122f3e6SDimitry Andric     // FIXME: post-indexed stores use am2offset_imm, which still encodes
14676122f3e6SDimitry Andric     // the vestigal zero-reg offset register. When that's fixed, this clause
14686122f3e6SDimitry Andric     // can be removed entirely.
14696122f3e6SDimitry Andric     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
14707d523365SDimitry Andric       int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1471f22ef01cSRoman Divacky       // STR_PRE, STR_POST
1472875ed548SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1473f22ef01cSRoman Divacky           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
14747a7e6055SDimitry Andric           .addReg(Base)
14757a7e6055SDimitry Andric           .addReg(0)
14767a7e6055SDimitry Andric           .addImm(Imm)
14777a7e6055SDimitry Andric           .add(predOps(Pred, PredReg));
14786122f3e6SDimitry Andric     } else {
1479f22ef01cSRoman Divacky       // t2STR_PRE, t2STR_POST
1480875ed548SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1481f22ef01cSRoman Divacky           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
14827a7e6055SDimitry Andric           .addReg(Base)
14837a7e6055SDimitry Andric           .addImm(Offset)
14847a7e6055SDimitry Andric           .add(predOps(Pred, PredReg));
1485f22ef01cSRoman Divacky     }
14866122f3e6SDimitry Andric   }
1487f22ef01cSRoman Divacky   MBB.erase(MBBI);
1488f22ef01cSRoman Divacky 
1489f22ef01cSRoman Divacky   return true;
1490f22ef01cSRoman Divacky }
1491f22ef01cSRoman Divacky 
MergeBaseUpdateLSDouble(MachineInstr & MI) const14927d523365SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
14937d523365SDimitry Andric   unsigned Opcode = MI.getOpcode();
14947d523365SDimitry Andric   assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
14957d523365SDimitry Andric          "Must have t2STRDi8 or t2LDRDi8");
14967d523365SDimitry Andric   if (MI.getOperand(3).getImm() != 0)
14977d523365SDimitry Andric     return false;
14987d523365SDimitry Andric 
14997d523365SDimitry Andric   // Behaviour for writeback is undefined if base register is the same as one
15007d523365SDimitry Andric   // of the others.
15017d523365SDimitry Andric   const MachineOperand &BaseOp = MI.getOperand(2);
15027d523365SDimitry Andric   unsigned Base = BaseOp.getReg();
15037d523365SDimitry Andric   const MachineOperand &Reg0Op = MI.getOperand(0);
15047d523365SDimitry Andric   const MachineOperand &Reg1Op = MI.getOperand(1);
15057d523365SDimitry Andric   if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
15067d523365SDimitry Andric     return false;
15077d523365SDimitry Andric 
15087d523365SDimitry Andric   unsigned PredReg;
15093ca95b02SDimitry Andric   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
15107d523365SDimitry Andric   MachineBasicBlock::iterator MBBI(MI);
15117d523365SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
15127d523365SDimitry Andric   int Offset;
15137d523365SDimitry Andric   MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
15147d523365SDimitry Andric                                                             PredReg, Offset);
15157d523365SDimitry Andric   unsigned NewOpc;
15167d523365SDimitry Andric   if (Offset == 8 || Offset == -8) {
15177d523365SDimitry Andric     NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
15187d523365SDimitry Andric   } else {
15197d523365SDimitry Andric     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
15207d523365SDimitry Andric     if (Offset == 8 || Offset == -8) {
15217d523365SDimitry Andric       NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
15227d523365SDimitry Andric     } else
15237d523365SDimitry Andric       return false;
15247d523365SDimitry Andric   }
15257d523365SDimitry Andric   MBB.erase(MergeInstr);
15267d523365SDimitry Andric 
15277d523365SDimitry Andric   DebugLoc DL = MI.getDebugLoc();
15287d523365SDimitry Andric   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
15297d523365SDimitry Andric   if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
15307a7e6055SDimitry Andric     MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
15317d523365SDimitry Andric   } else {
15327d523365SDimitry Andric     assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
15337a7e6055SDimitry Andric     MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
15347d523365SDimitry Andric   }
15357d523365SDimitry Andric   MIB.addReg(BaseOp.getReg(), RegState::Kill)
15367d523365SDimitry Andric      .addImm(Offset).addImm(Pred).addReg(PredReg);
15377d523365SDimitry Andric   assert(TII->get(Opcode).getNumOperands() == 6 &&
15387d523365SDimitry Andric          TII->get(NewOpc).getNumOperands() == 7 &&
15397d523365SDimitry Andric          "Unexpected number of operands in Opcode specification.");
15407d523365SDimitry Andric 
15417d523365SDimitry Andric   // Transfer implicit operands.
15427d523365SDimitry Andric   for (const MachineOperand &MO : MI.implicit_operands())
15437a7e6055SDimitry Andric     MIB.add(MO);
1544*b5893f02SDimitry Andric   MIB.setMemRefs(MI.memoperands());
15457d523365SDimitry Andric 
15467d523365SDimitry Andric   MBB.erase(MBBI);
15477d523365SDimitry Andric   return true;
15487d523365SDimitry Andric }
15497d523365SDimitry Andric 
155097bc6c73SDimitry Andric /// Returns true if instruction is a memory operation that this pass is capable
155197bc6c73SDimitry Andric /// of operating on.
isMemoryOp(const MachineInstr & MI)15527d523365SDimitry Andric static bool isMemoryOp(const MachineInstr &MI) {
15537d523365SDimitry Andric   unsigned Opcode = MI.getOpcode();
1554f22ef01cSRoman Divacky   switch (Opcode) {
1555f22ef01cSRoman Divacky   case ARM::VLDRS:
1556f22ef01cSRoman Divacky   case ARM::VSTRS:
1557f22ef01cSRoman Divacky   case ARM::VLDRD:
1558f22ef01cSRoman Divacky   case ARM::VSTRD:
15592754fe60SDimitry Andric   case ARM::LDRi12:
15602754fe60SDimitry Andric   case ARM::STRi12:
156191bc56edSDimitry Andric   case ARM::tLDRi:
156291bc56edSDimitry Andric   case ARM::tSTRi:
1563ff0cc061SDimitry Andric   case ARM::tLDRspi:
1564ff0cc061SDimitry Andric   case ARM::tSTRspi:
1565f22ef01cSRoman Divacky   case ARM::t2LDRi8:
1566f22ef01cSRoman Divacky   case ARM::t2LDRi12:
1567f22ef01cSRoman Divacky   case ARM::t2STRi8:
1568f22ef01cSRoman Divacky   case ARM::t2STRi12:
15697d523365SDimitry Andric     break;
15707d523365SDimitry Andric   default:
1571f22ef01cSRoman Divacky     return false;
1572f22ef01cSRoman Divacky   }
15737d523365SDimitry Andric   if (!MI.getOperand(1).isReg())
15747d523365SDimitry Andric     return false;
15757d523365SDimitry Andric 
15767d523365SDimitry Andric   // When no memory operands are present, conservatively assume unaligned,
15777d523365SDimitry Andric   // volatile, unfoldable.
15787d523365SDimitry Andric   if (!MI.hasOneMemOperand())
15797d523365SDimitry Andric     return false;
15807d523365SDimitry Andric 
15817d523365SDimitry Andric   const MachineMemOperand &MMO = **MI.memoperands_begin();
15827d523365SDimitry Andric 
15837d523365SDimitry Andric   // Don't touch volatile memory accesses - we may be changing their order.
15847d523365SDimitry Andric   if (MMO.isVolatile())
15857d523365SDimitry Andric     return false;
15867d523365SDimitry Andric 
15877d523365SDimitry Andric   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
15887d523365SDimitry Andric   // not.
15897d523365SDimitry Andric   if (MMO.getAlignment() < 4)
15907d523365SDimitry Andric     return false;
15917d523365SDimitry Andric 
15927d523365SDimitry Andric   // str <undef> could probably be eliminated entirely, but for now we just want
15937d523365SDimitry Andric   // to avoid making a mess of it.
15947d523365SDimitry Andric   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
15957d523365SDimitry Andric   if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
15967d523365SDimitry Andric     return false;
15977d523365SDimitry Andric 
15987d523365SDimitry Andric   // Likewise don't mess with references to undefined addresses.
15997d523365SDimitry Andric   if (MI.getOperand(1).isUndef())
16007d523365SDimitry Andric     return false;
16017d523365SDimitry Andric 
16027d523365SDimitry Andric   return true;
16037d523365SDimitry Andric }
1604f22ef01cSRoman Divacky 
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII)1605f22ef01cSRoman Divacky static void InsertLDR_STR(MachineBasicBlock &MBB,
16063ca95b02SDimitry Andric                           MachineBasicBlock::iterator &MBBI, int Offset,
16072cab237bSDimitry Andric                           bool isDef, unsigned NewOpc, unsigned Reg,
16082cab237bSDimitry Andric                           bool RegDeadKill, bool RegUndef, unsigned BaseReg,
16092cab237bSDimitry Andric                           bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
16102cab237bSDimitry Andric                           unsigned PredReg, const TargetInstrInfo *TII) {
1611f22ef01cSRoman Divacky   if (isDef) {
1612f22ef01cSRoman Divacky     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1613f22ef01cSRoman Divacky                                       TII->get(NewOpc))
1614f22ef01cSRoman Divacky       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1615f22ef01cSRoman Divacky       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1616f22ef01cSRoman Divacky     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1617f22ef01cSRoman Divacky   } else {
1618f22ef01cSRoman Divacky     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1619f22ef01cSRoman Divacky                                       TII->get(NewOpc))
1620f22ef01cSRoman Divacky       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1621f22ef01cSRoman Divacky       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1622f22ef01cSRoman Divacky     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1623f22ef01cSRoman Divacky   }
1624f22ef01cSRoman Divacky }
1625f22ef01cSRoman Divacky 
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1626f22ef01cSRoman Divacky bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1627f22ef01cSRoman Divacky                                           MachineBasicBlock::iterator &MBBI) {
1628f22ef01cSRoman Divacky   MachineInstr *MI = &*MBBI;
1629f22ef01cSRoman Divacky   unsigned Opcode = MI->getOpcode();
16302cab237bSDimitry Andric   // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
16312cab237bSDimitry Andric   // if we see this opcode.
16323dac3a9bSDimitry Andric   if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
16333dac3a9bSDimitry Andric     return false;
16343dac3a9bSDimitry Andric 
1635dff0c46cSDimitry Andric   const MachineOperand &BaseOp = MI->getOperand(2);
1636dff0c46cSDimitry Andric   unsigned BaseReg = BaseOp.getReg();
1637f22ef01cSRoman Divacky   unsigned EvenReg = MI->getOperand(0).getReg();
1638f22ef01cSRoman Divacky   unsigned OddReg  = MI->getOperand(1).getReg();
1639f22ef01cSRoman Divacky   unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1640f22ef01cSRoman Divacky   unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
16413dac3a9bSDimitry Andric 
1642dff0c46cSDimitry Andric   // ARM errata 602117: LDRD with base in list may result in incorrect base
1643dff0c46cSDimitry Andric   // register when interrupted or faulted.
16443dac3a9bSDimitry Andric   bool Errata602117 = EvenReg == BaseReg &&
16453dac3a9bSDimitry Andric     (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
16463dac3a9bSDimitry Andric   // ARM LDRD/STRD needs consecutive registers.
16473dac3a9bSDimitry Andric   bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
16483dac3a9bSDimitry Andric     (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
16493dac3a9bSDimitry Andric 
16503dac3a9bSDimitry Andric   if (!Errata602117 && !NonConsecutiveRegs)
1651f22ef01cSRoman Divacky     return false;
1652f22ef01cSRoman Divacky 
1653f22ef01cSRoman Divacky   bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1654f22ef01cSRoman Divacky   bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1655f22ef01cSRoman Divacky   bool EvenDeadKill = isLd ?
1656f22ef01cSRoman Divacky     MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1657f22ef01cSRoman Divacky   bool EvenUndef = MI->getOperand(0).isUndef();
1658f22ef01cSRoman Divacky   bool OddDeadKill  = isLd ?
1659f22ef01cSRoman Divacky     MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1660f22ef01cSRoman Divacky   bool OddUndef = MI->getOperand(1).isUndef();
1661f22ef01cSRoman Divacky   bool BaseKill = BaseOp.isKill();
1662f22ef01cSRoman Divacky   bool BaseUndef = BaseOp.isUndef();
16632cab237bSDimitry Andric   assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
16642cab237bSDimitry Andric          "register offset not handled below");
16653ca95b02SDimitry Andric   int OffImm = getMemoryOpOffset(*MI);
1666f22ef01cSRoman Divacky   unsigned PredReg = 0;
16673ca95b02SDimitry Andric   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1668f22ef01cSRoman Divacky 
16692754fe60SDimitry Andric   if (OddRegNum > EvenRegNum && OffImm == 0) {
1670f22ef01cSRoman Divacky     // Ascending register numbers and no offset. It's safe to change it to a
1671f22ef01cSRoman Divacky     // ldm or stm.
1672f22ef01cSRoman Divacky     unsigned NewOpc = (isLd)
16732754fe60SDimitry Andric       ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
16742754fe60SDimitry Andric       : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1675f22ef01cSRoman Divacky     if (isLd) {
1676f22ef01cSRoman Divacky       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1677f22ef01cSRoman Divacky         .addReg(BaseReg, getKillRegState(BaseKill))
1678f22ef01cSRoman Divacky         .addImm(Pred).addReg(PredReg)
1679f22ef01cSRoman Divacky         .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1680f22ef01cSRoman Divacky         .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1681f22ef01cSRoman Divacky       ++NumLDRD2LDM;
1682f22ef01cSRoman Divacky     } else {
1683f22ef01cSRoman Divacky       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1684f22ef01cSRoman Divacky         .addReg(BaseReg, getKillRegState(BaseKill))
1685f22ef01cSRoman Divacky         .addImm(Pred).addReg(PredReg)
1686f22ef01cSRoman Divacky         .addReg(EvenReg,
1687f22ef01cSRoman Divacky                 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1688f22ef01cSRoman Divacky         .addReg(OddReg,
1689f22ef01cSRoman Divacky                 getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1690f22ef01cSRoman Divacky       ++NumSTRD2STM;
1691f22ef01cSRoman Divacky     }
1692f22ef01cSRoman Divacky   } else {
1693f22ef01cSRoman Divacky     // Split into two instructions.
1694f22ef01cSRoman Divacky     unsigned NewOpc = (isLd)
16952754fe60SDimitry Andric       ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
16962754fe60SDimitry Andric       : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1697dff0c46cSDimitry Andric     // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1698dff0c46cSDimitry Andric     // so adjust and use t2LDRi12 here for that.
1699dff0c46cSDimitry Andric     unsigned NewOpc2 = (isLd)
1700dff0c46cSDimitry Andric       ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1701dff0c46cSDimitry Andric       : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
17022cab237bSDimitry Andric     // If this is a load, make sure the first load does not clobber the base
17032cab237bSDimitry Andric     // register before the second load reads it.
17042cab237bSDimitry Andric     if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
17052754fe60SDimitry Andric       assert(!TRI->regsOverlap(OddReg, BaseReg));
17062cab237bSDimitry Andric       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
17072cab237bSDimitry Andric                     false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
17082cab237bSDimitry Andric       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
17092cab237bSDimitry Andric                     false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1710f22ef01cSRoman Divacky     } else {
1711f22ef01cSRoman Divacky       if (OddReg == EvenReg && EvenDeadKill) {
1712ffd1746dSEd Schouten         // If the two source operands are the same, the kill marker is
1713ffd1746dSEd Schouten         // probably on the first one. e.g.
17142cab237bSDimitry Andric         // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1715f22ef01cSRoman Divacky         EvenDeadKill = false;
1716f22ef01cSRoman Divacky         OddDeadKill = true;
1717f22ef01cSRoman Divacky       }
1718dff0c46cSDimitry Andric       // Never kill the base register in the first instruction.
1719dff0c46cSDimitry Andric       if (EvenReg == BaseReg)
1720dff0c46cSDimitry Andric         EvenDeadKill = false;
17212cab237bSDimitry Andric       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
17222cab237bSDimitry Andric                     EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
17232cab237bSDimitry Andric       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
17242cab237bSDimitry Andric                     OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1725f22ef01cSRoman Divacky     }
1726f22ef01cSRoman Divacky     if (isLd)
1727f22ef01cSRoman Divacky       ++NumLDRD2LDR;
1728f22ef01cSRoman Divacky     else
1729f22ef01cSRoman Divacky       ++NumSTRD2STR;
1730f22ef01cSRoman Divacky   }
1731f22ef01cSRoman Divacky 
1732875ed548SDimitry Andric   MBBI = MBB.erase(MBBI);
1733ffd1746dSEd Schouten   return true;
1734f22ef01cSRoman Divacky }
1735f22ef01cSRoman Divacky 
173697bc6c73SDimitry Andric /// An optimization pass to turn multiple LDR / STR ops of the same base and
173797bc6c73SDimitry Andric /// incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1738f22ef01cSRoman Divacky bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1739f22ef01cSRoman Divacky   MemOpQueue MemOps;
1740f22ef01cSRoman Divacky   unsigned CurrBase = 0;
1741ff0cc061SDimitry Andric   unsigned CurrOpc = ~0u;
1742f22ef01cSRoman Divacky   ARMCC::CondCodes CurrPred = ARMCC::AL;
1743f22ef01cSRoman Divacky   unsigned Position = 0;
1744875ed548SDimitry Andric   assert(Candidates.size() == 0);
17457d523365SDimitry Andric   assert(MergeBaseCandidates.size() == 0);
1746875ed548SDimitry Andric   LiveRegsValid = false;
1747f22ef01cSRoman Divacky 
1748875ed548SDimitry Andric   for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1749875ed548SDimitry Andric        I = MBBI) {
1750875ed548SDimitry Andric     // The instruction in front of the iterator is the one we look at.
1751875ed548SDimitry Andric     MBBI = std::prev(I);
1752f22ef01cSRoman Divacky     if (FixInvalidRegPairOp(MBB, MBBI))
1753f22ef01cSRoman Divacky       continue;
1754875ed548SDimitry Andric     ++Position;
1755f22ef01cSRoman Divacky 
17567d523365SDimitry Andric     if (isMemoryOp(*MBBI)) {
1757ff0cc061SDimitry Andric       unsigned Opcode = MBBI->getOpcode();
1758ffd1746dSEd Schouten       const MachineOperand &MO = MBBI->getOperand(0);
1759ffd1746dSEd Schouten       unsigned Reg = MO.getReg();
1760875ed548SDimitry Andric       unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1761f22ef01cSRoman Divacky       unsigned PredReg = 0;
17623ca95b02SDimitry Andric       ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
17633ca95b02SDimitry Andric       int Offset = getMemoryOpOffset(*MBBI);
1764875ed548SDimitry Andric       if (CurrBase == 0) {
1765875ed548SDimitry Andric         // Start of a new chain.
1766875ed548SDimitry Andric         CurrBase = Base;
1767875ed548SDimitry Andric         CurrOpc  = Opcode;
1768875ed548SDimitry Andric         CurrPred = Pred;
17693ca95b02SDimitry Andric         MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1770875ed548SDimitry Andric         continue;
1771875ed548SDimitry Andric       }
1772875ed548SDimitry Andric       // Note: No need to match PredReg in the next if.
1773875ed548SDimitry Andric       if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1774284c1978SDimitry Andric         // Watch out for:
1775284c1978SDimitry Andric         //   r4 := ldr [r0, #8]
1776284c1978SDimitry Andric         //   r4 := ldr [r0, #4]
1777875ed548SDimitry Andric         // or
1778875ed548SDimitry Andric         //   r0 := ldr [r0]
1779875ed548SDimitry Andric         // If a load overrides the base register or a register loaded by
1780875ed548SDimitry Andric         // another load in our chain, we cannot take this instruction.
1781284c1978SDimitry Andric         bool Overlap = false;
1782875ed548SDimitry Andric         if (isLoadSingle(Opcode)) {
1783875ed548SDimitry Andric           Overlap = (Base == Reg);
1784875ed548SDimitry Andric           if (!Overlap) {
1785875ed548SDimitry Andric             for (const MemOpQueueEntry &E : MemOps) {
1786875ed548SDimitry Andric               if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1787284c1978SDimitry Andric                 Overlap = true;
1788284c1978SDimitry Andric                 break;
1789284c1978SDimitry Andric               }
1790284c1978SDimitry Andric             }
1791875ed548SDimitry Andric           }
1792f22ef01cSRoman Divacky         }
1793f22ef01cSRoman Divacky 
1794875ed548SDimitry Andric         if (!Overlap) {
1795875ed548SDimitry Andric           // Check offset and sort memory operation into the current chain.
1796f22ef01cSRoman Divacky           if (Offset > MemOps.back().Offset) {
17973ca95b02SDimitry Andric             MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1798875ed548SDimitry Andric             continue;
1799f22ef01cSRoman Divacky           } else {
1800875ed548SDimitry Andric             MemOpQueue::iterator MI, ME;
1801875ed548SDimitry Andric             for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1802875ed548SDimitry Andric               if (Offset < MI->Offset) {
1803875ed548SDimitry Andric                 // Found a place to insert.
1804f22ef01cSRoman Divacky                 break;
1805875ed548SDimitry Andric               }
1806875ed548SDimitry Andric               if (Offset == MI->Offset) {
1807875ed548SDimitry Andric                 // Collision, abort.
1808875ed548SDimitry Andric                 MI = ME;
1809f22ef01cSRoman Divacky                 break;
1810f22ef01cSRoman Divacky               }
1811f22ef01cSRoman Divacky             }
1812875ed548SDimitry Andric             if (MI != MemOps.end()) {
18133ca95b02SDimitry Andric               MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1814875ed548SDimitry Andric               continue;
1815f22ef01cSRoman Divacky             }
1816f22ef01cSRoman Divacky           }
1817f22ef01cSRoman Divacky         }
1818f22ef01cSRoman Divacky       }
1819f22ef01cSRoman Divacky 
1820875ed548SDimitry Andric       // Don't advance the iterator; The op will start a new chain next.
1821875ed548SDimitry Andric       MBBI = I;
1822875ed548SDimitry Andric       --Position;
1823875ed548SDimitry Andric       // Fallthrough to look into existing chain.
18244ba319b5SDimitry Andric     } else if (MBBI->isDebugInstr()) {
1825875ed548SDimitry Andric       continue;
18267d523365SDimitry Andric     } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
18277d523365SDimitry Andric                MBBI->getOpcode() == ARM::t2STRDi8) {
18287d523365SDimitry Andric       // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
18297d523365SDimitry Andric       // remember them because we may still be able to merge add/sub into them.
18303ca95b02SDimitry Andric       MergeBaseCandidates.push_back(&*MBBI);
18317d523365SDimitry Andric     }
18327d523365SDimitry Andric 
1833875ed548SDimitry Andric     // If we are here then the chain is broken; Extract candidates for a merge.
1834875ed548SDimitry Andric     if (MemOps.size() > 0) {
1835875ed548SDimitry Andric       FormCandidates(MemOps);
1836875ed548SDimitry Andric       // Reset for the next chain.
1837f22ef01cSRoman Divacky       CurrBase = 0;
1838ff0cc061SDimitry Andric       CurrOpc = ~0u;
1839f22ef01cSRoman Divacky       CurrPred = ARMCC::AL;
1840f22ef01cSRoman Divacky       MemOps.clear();
1841f22ef01cSRoman Divacky     }
1842875ed548SDimitry Andric   }
1843875ed548SDimitry Andric   if (MemOps.size() > 0)
1844875ed548SDimitry Andric     FormCandidates(MemOps);
1845f22ef01cSRoman Divacky 
1846875ed548SDimitry Andric   // Sort candidates so they get processed from end to begin of the basic
1847875ed548SDimitry Andric   // block later; This is necessary for liveness calculation.
1848875ed548SDimitry Andric   auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1849875ed548SDimitry Andric     return M0->InsertPos < M1->InsertPos;
1850875ed548SDimitry Andric   };
1851*b5893f02SDimitry Andric   llvm::sort(Candidates, LessThan);
1852875ed548SDimitry Andric 
1853875ed548SDimitry Andric   // Go through list of candidates and merge.
1854875ed548SDimitry Andric   bool Changed = false;
1855875ed548SDimitry Andric   for (const MergeCandidate *Candidate : Candidates) {
1856875ed548SDimitry Andric     if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1857875ed548SDimitry Andric       MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1858875ed548SDimitry Andric       // Merge preceding/trailing base inc/dec into the merged op.
1859875ed548SDimitry Andric       if (Merged) {
1860875ed548SDimitry Andric         Changed = true;
1861875ed548SDimitry Andric         unsigned Opcode = Merged->getOpcode();
18627d523365SDimitry Andric         if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
18637d523365SDimitry Andric           MergeBaseUpdateLSDouble(*Merged);
18647d523365SDimitry Andric         else
1865875ed548SDimitry Andric           MergeBaseUpdateLSMultiple(Merged);
1866875ed548SDimitry Andric       } else {
1867875ed548SDimitry Andric         for (MachineInstr *MI : Candidate->Instrs) {
1868875ed548SDimitry Andric           if (MergeBaseUpdateLoadStore(MI))
1869875ed548SDimitry Andric             Changed = true;
1870f22ef01cSRoman Divacky         }
1871f22ef01cSRoman Divacky       }
1872875ed548SDimitry Andric     } else {
1873875ed548SDimitry Andric       assert(Candidate->Instrs.size() == 1);
1874875ed548SDimitry Andric       if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1875875ed548SDimitry Andric         Changed = true;
1876f22ef01cSRoman Divacky     }
1877875ed548SDimitry Andric   }
1878875ed548SDimitry Andric   Candidates.clear();
18797d523365SDimitry Andric   // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
18807d523365SDimitry Andric   for (MachineInstr *MI : MergeBaseCandidates)
18817d523365SDimitry Andric     MergeBaseUpdateLSDouble(*MI);
18827d523365SDimitry Andric   MergeBaseCandidates.clear();
1883875ed548SDimitry Andric 
1884875ed548SDimitry Andric   return Changed;
1885f22ef01cSRoman Divacky }
1886f22ef01cSRoman Divacky 
188797bc6c73SDimitry Andric /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
188897bc6c73SDimitry Andric /// into the preceding stack restore so it directly restore the value of LR
188997bc6c73SDimitry Andric /// into pc.
1890f22ef01cSRoman Divacky ///   ldmfd sp!, {..., lr}
1891f22ef01cSRoman Divacky ///   bx lr
1892f22ef01cSRoman Divacky /// or
1893f22ef01cSRoman Divacky ///   ldmfd sp!, {..., lr}
1894f22ef01cSRoman Divacky ///   mov pc, lr
1895f22ef01cSRoman Divacky /// =>
1896f22ef01cSRoman Divacky ///   ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1897f22ef01cSRoman Divacky bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
189891bc56edSDimitry Andric   // Thumb1 LDM doesn't allow high registers.
189991bc56edSDimitry Andric   if (isThumb1) return false;
1900f22ef01cSRoman Divacky   if (MBB.empty()) return false;
1901f22ef01cSRoman Divacky 
19022754fe60SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1903d88c1a5aSDimitry Andric   if (MBBI != MBB.begin() && MBBI != MBB.end() &&
1904f22ef01cSRoman Divacky       (MBBI->getOpcode() == ARM::BX_RET ||
1905f22ef01cSRoman Divacky        MBBI->getOpcode() == ARM::tBX_RET ||
1906f22ef01cSRoman Divacky        MBBI->getOpcode() == ARM::MOVPCLR)) {
19077d523365SDimitry Andric     MachineBasicBlock::iterator PrevI = std::prev(MBBI);
19084ba319b5SDimitry Andric     // Ignore any debug instructions.
19094ba319b5SDimitry Andric     while (PrevI->isDebugInstr() && PrevI != MBB.begin())
19107d523365SDimitry Andric       --PrevI;
19113ca95b02SDimitry Andric     MachineInstr &PrevMI = *PrevI;
19123ca95b02SDimitry Andric     unsigned Opcode = PrevMI.getOpcode();
19132754fe60SDimitry Andric     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
19142754fe60SDimitry Andric         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
19152754fe60SDimitry Andric         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
19163ca95b02SDimitry Andric       MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
1917f22ef01cSRoman Divacky       if (MO.getReg() != ARM::LR)
1918f22ef01cSRoman Divacky         return false;
19192754fe60SDimitry Andric       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
19202754fe60SDimitry Andric       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
19212754fe60SDimitry Andric               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
19223ca95b02SDimitry Andric       PrevMI.setDesc(TII->get(NewOpc));
1923f22ef01cSRoman Divacky       MO.setReg(ARM::PC);
19243ca95b02SDimitry Andric       PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
1925f22ef01cSRoman Divacky       MBB.erase(MBBI);
19262cab237bSDimitry Andric       // We now restore LR into PC so it is not live-out of the return block
19272cab237bSDimitry Andric       // anymore: Clear the CSI Restored bit.
19282cab237bSDimitry Andric       MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
19292cab237bSDimitry Andric       // CSI should be fixed after PrologEpilog Insertion
19302cab237bSDimitry Andric       assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
19312cab237bSDimitry Andric       for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
19322cab237bSDimitry Andric         if (Info.getReg() == ARM::LR) {
19332cab237bSDimitry Andric           Info.setRestored(false);
19342cab237bSDimitry Andric           break;
19352cab237bSDimitry Andric         }
19362cab237bSDimitry Andric       }
1937f22ef01cSRoman Divacky       return true;
1938f22ef01cSRoman Divacky     }
1939f22ef01cSRoman Divacky   }
1940f22ef01cSRoman Divacky   return false;
1941f22ef01cSRoman Divacky }
1942f22ef01cSRoman Divacky 
CombineMovBx(MachineBasicBlock & MBB)19437d523365SDimitry Andric bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
19447d523365SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
19457d523365SDimitry Andric   if (MBBI == MBB.begin() || MBBI == MBB.end() ||
19467d523365SDimitry Andric       MBBI->getOpcode() != ARM::tBX_RET)
19477d523365SDimitry Andric     return false;
19487d523365SDimitry Andric 
19497d523365SDimitry Andric   MachineBasicBlock::iterator Prev = MBBI;
19507d523365SDimitry Andric   --Prev;
19517d523365SDimitry Andric   if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
19527d523365SDimitry Andric     return false;
19537d523365SDimitry Andric 
19547d523365SDimitry Andric   for (auto Use : Prev->uses())
19557d523365SDimitry Andric     if (Use.isKill()) {
1956d4419f6fSDimitry Andric       assert(STI->hasV4TOps());
19577a7e6055SDimitry Andric       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
19587a7e6055SDimitry Andric           .addReg(Use.getReg(), RegState::Kill)
19597a7e6055SDimitry Andric           .add(predOps(ARMCC::AL))
19603ca95b02SDimitry Andric           .copyImplicitOps(*MBBI);
19617d523365SDimitry Andric       MBB.erase(MBBI);
19627d523365SDimitry Andric       MBB.erase(Prev);
19637d523365SDimitry Andric       return true;
19647d523365SDimitry Andric     }
19657d523365SDimitry Andric 
19667d523365SDimitry Andric   llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
19677d523365SDimitry Andric }
19687d523365SDimitry Andric 
runOnMachineFunction(MachineFunction & Fn)1969f22ef01cSRoman Divacky bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
19702cab237bSDimitry Andric   if (skipFunction(Fn.getFunction()))
19713ca95b02SDimitry Andric     return false;
19723ca95b02SDimitry Andric 
1973875ed548SDimitry Andric   MF = &Fn;
1974ff0cc061SDimitry Andric   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1975ff0cc061SDimitry Andric   TL = STI->getTargetLowering();
1976f22ef01cSRoman Divacky   AFI = Fn.getInfo<ARMFunctionInfo>();
1977ff0cc061SDimitry Andric   TII = STI->getInstrInfo();
1978ff0cc061SDimitry Andric   TRI = STI->getRegisterInfo();
19797d523365SDimitry Andric 
1980875ed548SDimitry Andric   RegClassInfoValid = false;
1981f22ef01cSRoman Divacky   isThumb2 = AFI->isThumb2Function();
198291bc56edSDimitry Andric   isThumb1 = AFI->isThumbFunction() && !isThumb2;
198391bc56edSDimitry Andric 
1984f22ef01cSRoman Divacky   bool Modified = false;
1985f22ef01cSRoman Divacky   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1986f22ef01cSRoman Divacky        ++MFI) {
1987f22ef01cSRoman Divacky     MachineBasicBlock &MBB = *MFI;
1988f22ef01cSRoman Divacky     Modified |= LoadStoreMultipleOpti(MBB);
1989ff0cc061SDimitry Andric     if (STI->hasV5TOps())
1990f22ef01cSRoman Divacky       Modified |= MergeReturnIntoLDM(MBB);
19917d523365SDimitry Andric     if (isThumb1)
19927d523365SDimitry Andric       Modified |= CombineMovBx(MBB);
1993f22ef01cSRoman Divacky   }
1994f22ef01cSRoman Divacky 
1995875ed548SDimitry Andric   Allocator.DestroyAll();
1996f22ef01cSRoman Divacky   return Modified;
1997f22ef01cSRoman Divacky }
1998f22ef01cSRoman Divacky 
19997d523365SDimitry Andric #define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
20007d523365SDimitry Andric   "ARM pre- register allocation load / store optimization pass"
20017d523365SDimitry Andric 
2002f22ef01cSRoman Divacky namespace {
20032cab237bSDimitry Andric 
200497bc6c73SDimitry Andric   /// Pre- register allocation pass that move load / stores from consecutive
200597bc6c73SDimitry Andric   /// locations close to make it more likely they will be combined later.
2006f22ef01cSRoman Divacky   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2007f22ef01cSRoman Divacky     static char ID;
2008f22ef01cSRoman Divacky 
20097a7e6055SDimitry Andric     AliasAnalysis *AA;
20103861d79fSDimitry Andric     const DataLayout *TD;
2011f22ef01cSRoman Divacky     const TargetInstrInfo *TII;
2012f22ef01cSRoman Divacky     const TargetRegisterInfo *TRI;
2013f22ef01cSRoman Divacky     const ARMSubtarget *STI;
2014f22ef01cSRoman Divacky     MachineRegisterInfo *MRI;
2015f22ef01cSRoman Divacky     MachineFunction *MF;
2016f22ef01cSRoman Divacky 
ARMPreAllocLoadStoreOpt__anon9023e32a0311::ARMPreAllocLoadStoreOpt20172cab237bSDimitry Andric     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
20182cab237bSDimitry Andric 
201991bc56edSDimitry Andric     bool runOnMachineFunction(MachineFunction &Fn) override;
2020f22ef01cSRoman Divacky 
getPassName__anon9023e32a0311::ARMPreAllocLoadStoreOpt2021d88c1a5aSDimitry Andric     StringRef getPassName() const override {
20227d523365SDimitry Andric       return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
2023f22ef01cSRoman Divacky     }
2024f22ef01cSRoman Divacky 
getAnalysisUsage__anon9023e32a0311::ARMPreAllocLoadStoreOpt20252cab237bSDimitry Andric     void getAnalysisUsage(AnalysisUsage &AU) const override {
20267a7e6055SDimitry Andric       AU.addRequired<AAResultsWrapperPass>();
20277a7e6055SDimitry Andric       MachineFunctionPass::getAnalysisUsage(AU);
20287a7e6055SDimitry Andric     }
20297a7e6055SDimitry Andric 
2030f22ef01cSRoman Divacky   private:
2031f22ef01cSRoman Divacky     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2032f22ef01cSRoman Divacky                           unsigned &NewOpc, unsigned &EvenReg,
2033f22ef01cSRoman Divacky                           unsigned &OddReg, unsigned &BaseReg,
20342754fe60SDimitry Andric                           int &Offset,
2035f22ef01cSRoman Divacky                           unsigned &PredReg, ARMCC::CondCodes &Pred,
2036f22ef01cSRoman Divacky                           bool &isT2);
2037f22ef01cSRoman Divacky     bool RescheduleOps(MachineBasicBlock *MBB,
2038f785676fSDimitry Andric                        SmallVectorImpl<MachineInstr *> &Ops,
2039f22ef01cSRoman Divacky                        unsigned Base, bool isLd,
2040f22ef01cSRoman Divacky                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
2041f22ef01cSRoman Divacky     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2042f22ef01cSRoman Divacky   };
20432cab237bSDimitry Andric 
20442cab237bSDimitry Andric } // end anonymous namespace
20452cab237bSDimitry Andric 
2046f22ef01cSRoman Divacky char ARMPreAllocLoadStoreOpt::ID = 0;
2047f22ef01cSRoman Divacky 
20483ca95b02SDimitry Andric INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
20497d523365SDimitry Andric                 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
20507d523365SDimitry Andric 
runOnMachineFunction(MachineFunction & Fn)2051f22ef01cSRoman Divacky bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
20522cab237bSDimitry Andric   if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
20533ca95b02SDimitry Andric     return false;
20543ca95b02SDimitry Andric 
20557d523365SDimitry Andric   TD = &Fn.getDataLayout();
205639d628a0SDimitry Andric   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
2057ff0cc061SDimitry Andric   TII = STI->getInstrInfo();
2058ff0cc061SDimitry Andric   TRI = STI->getRegisterInfo();
2059f22ef01cSRoman Divacky   MRI = &Fn.getRegInfo();
2060f22ef01cSRoman Divacky   MF  = &Fn;
20617a7e6055SDimitry Andric   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2062f22ef01cSRoman Divacky 
2063f22ef01cSRoman Divacky   bool Modified = false;
20647d523365SDimitry Andric   for (MachineBasicBlock &MFI : Fn)
20657d523365SDimitry Andric     Modified |= RescheduleLoadStoreInstrs(&MFI);
2066f22ef01cSRoman Divacky 
2067f22ef01cSRoman Divacky   return Modified;
2068f22ef01cSRoman Divacky }
2069f22ef01cSRoman Divacky 
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSetImpl<MachineInstr * > & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI,AliasAnalysis * AA)2070f22ef01cSRoman Divacky static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2071f22ef01cSRoman Divacky                                       MachineBasicBlock::iterator I,
2072f22ef01cSRoman Divacky                                       MachineBasicBlock::iterator E,
207339d628a0SDimitry Andric                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
2074f22ef01cSRoman Divacky                                       SmallSet<unsigned, 4> &MemRegs,
20757a7e6055SDimitry Andric                                       const TargetRegisterInfo *TRI,
20767a7e6055SDimitry Andric                                       AliasAnalysis *AA) {
2077f22ef01cSRoman Divacky   // Are there stores / loads / calls between them?
2078f22ef01cSRoman Divacky   SmallSet<unsigned, 4> AddedRegPressure;
2079f22ef01cSRoman Divacky   while (++I != E) {
20804ba319b5SDimitry Andric     if (I->isDebugInstr() || MemOps.count(&*I))
2081f22ef01cSRoman Divacky       continue;
2082dff0c46cSDimitry Andric     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2083f22ef01cSRoman Divacky       return false;
20847a7e6055SDimitry Andric     if (I->mayStore() || (!isLd && I->mayLoad()))
20857a7e6055SDimitry Andric       for (MachineInstr *MemOp : MemOps)
20867a7e6055SDimitry Andric         if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2087f22ef01cSRoman Divacky           return false;
2088f22ef01cSRoman Divacky     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2089f22ef01cSRoman Divacky       MachineOperand &MO = I->getOperand(j);
2090f22ef01cSRoman Divacky       if (!MO.isReg())
2091f22ef01cSRoman Divacky         continue;
2092f22ef01cSRoman Divacky       unsigned Reg = MO.getReg();
2093f22ef01cSRoman Divacky       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2094f22ef01cSRoman Divacky         return false;
2095f22ef01cSRoman Divacky       if (Reg != Base && !MemRegs.count(Reg))
2096f22ef01cSRoman Divacky         AddedRegPressure.insert(Reg);
2097f22ef01cSRoman Divacky     }
2098f22ef01cSRoman Divacky   }
2099f22ef01cSRoman Divacky 
2100f22ef01cSRoman Divacky   // Estimate register pressure increase due to the transformation.
2101f22ef01cSRoman Divacky   if (MemRegs.size() <= 4)
2102f22ef01cSRoman Divacky     // Ok if we are moving small number of instructions.
2103f22ef01cSRoman Divacky     return true;
2104f22ef01cSRoman Divacky   return AddedRegPressure.size() <= MemRegs.size() * 2;
2105f22ef01cSRoman Divacky }
2106f22ef01cSRoman Divacky 
2107f22ef01cSRoman Divacky bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & FirstReg,unsigned & SecondReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)2108f22ef01cSRoman Divacky ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
210997bc6c73SDimitry Andric                                           DebugLoc &dl, unsigned &NewOpc,
211097bc6c73SDimitry Andric                                           unsigned &FirstReg,
211197bc6c73SDimitry Andric                                           unsigned &SecondReg,
211297bc6c73SDimitry Andric                                           unsigned &BaseReg, int &Offset,
211397bc6c73SDimitry Andric                                           unsigned &PredReg,
2114f22ef01cSRoman Divacky                                           ARMCC::CondCodes &Pred,
2115f22ef01cSRoman Divacky                                           bool &isT2) {
2116f22ef01cSRoman Divacky   // Make sure we're allowed to generate LDRD/STRD.
2117f22ef01cSRoman Divacky   if (!STI->hasV5TEOps())
2118f22ef01cSRoman Divacky     return false;
2119f22ef01cSRoman Divacky 
2120f22ef01cSRoman Divacky   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2121f22ef01cSRoman Divacky   unsigned Scale = 1;
2122f22ef01cSRoman Divacky   unsigned Opcode = Op0->getOpcode();
212391bc56edSDimitry Andric   if (Opcode == ARM::LDRi12) {
2124f22ef01cSRoman Divacky     NewOpc = ARM::LDRD;
212591bc56edSDimitry Andric   } else if (Opcode == ARM::STRi12) {
2126f22ef01cSRoman Divacky     NewOpc = ARM::STRD;
212791bc56edSDimitry Andric   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2128f22ef01cSRoman Divacky     NewOpc = ARM::t2LDRDi8;
2129f22ef01cSRoman Divacky     Scale = 4;
2130f22ef01cSRoman Divacky     isT2 = true;
2131f22ef01cSRoman Divacky   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2132f22ef01cSRoman Divacky     NewOpc = ARM::t2STRDi8;
2133f22ef01cSRoman Divacky     Scale = 4;
2134f22ef01cSRoman Divacky     isT2 = true;
213591bc56edSDimitry Andric   } else {
2136f22ef01cSRoman Divacky     return false;
213791bc56edSDimitry Andric   }
2138f22ef01cSRoman Divacky 
21392754fe60SDimitry Andric   // Make sure the base address satisfies i64 ld / st alignment requirement.
2140f785676fSDimitry Andric   // At the moment, we ignore the memoryoperand's value.
2141f785676fSDimitry Andric   // If we want to use AliasAnalysis, we should check it accordingly.
2142f22ef01cSRoman Divacky   if (!Op0->hasOneMemOperand() ||
2143f22ef01cSRoman Divacky       (*Op0->memoperands_begin())->isVolatile())
2144f22ef01cSRoman Divacky     return false;
2145f22ef01cSRoman Divacky 
2146f22ef01cSRoman Divacky   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
21472cab237bSDimitry Andric   const Function &Func = MF->getFunction();
2148f22ef01cSRoman Divacky   unsigned ReqAlign = STI->hasV6Ops()
21492cab237bSDimitry Andric     ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
2150f22ef01cSRoman Divacky     : 8;  // Pre-v6 need 8-byte align
2151f22ef01cSRoman Divacky   if (Align < ReqAlign)
2152f22ef01cSRoman Divacky     return false;
2153f22ef01cSRoman Divacky 
2154f22ef01cSRoman Divacky   // Then make sure the immediate offset fits.
21553ca95b02SDimitry Andric   int OffImm = getMemoryOpOffset(*Op0);
2156f22ef01cSRoman Divacky   if (isT2) {
2157f22ef01cSRoman Divacky     int Limit = (1 << 8) * Scale;
21583b0f4066SDimitry Andric     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2159f22ef01cSRoman Divacky       return false;
2160f22ef01cSRoman Divacky     Offset = OffImm;
2161f22ef01cSRoman Divacky   } else {
2162f22ef01cSRoman Divacky     ARM_AM::AddrOpc AddSub = ARM_AM::add;
2163f22ef01cSRoman Divacky     if (OffImm < 0) {
2164f22ef01cSRoman Divacky       AddSub = ARM_AM::sub;
2165f22ef01cSRoman Divacky       OffImm = - OffImm;
2166f22ef01cSRoman Divacky     }
2167f22ef01cSRoman Divacky     int Limit = (1 << 8) * Scale;
2168f22ef01cSRoman Divacky     if (OffImm >= Limit || (OffImm & (Scale-1)))
2169f22ef01cSRoman Divacky       return false;
2170f22ef01cSRoman Divacky     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2171f22ef01cSRoman Divacky   }
217297bc6c73SDimitry Andric   FirstReg = Op0->getOperand(0).getReg();
217397bc6c73SDimitry Andric   SecondReg = Op1->getOperand(0).getReg();
217497bc6c73SDimitry Andric   if (FirstReg == SecondReg)
2175f22ef01cSRoman Divacky     return false;
2176f22ef01cSRoman Divacky   BaseReg = Op0->getOperand(1).getReg();
21773ca95b02SDimitry Andric   Pred = getInstrPredicate(*Op0, PredReg);
2178f22ef01cSRoman Divacky   dl = Op0->getDebugLoc();
2179f22ef01cSRoman Divacky   return true;
2180f22ef01cSRoman Divacky }
2181f22ef01cSRoman Divacky 
RescheduleOps(MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)2182f22ef01cSRoman Divacky bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2183f785676fSDimitry Andric                                  SmallVectorImpl<MachineInstr *> &Ops,
2184f22ef01cSRoman Divacky                                  unsigned Base, bool isLd,
2185f22ef01cSRoman Divacky                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2186f22ef01cSRoman Divacky   bool RetVal = false;
2187f22ef01cSRoman Divacky 
2188f22ef01cSRoman Divacky   // Sort by offset (in reverse order).
2189*b5893f02SDimitry Andric   llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
21903ca95b02SDimitry Andric     int LOffset = getMemoryOpOffset(*LHS);
21913ca95b02SDimitry Andric     int ROffset = getMemoryOpOffset(*RHS);
219291bc56edSDimitry Andric     assert(LHS == RHS || LOffset != ROffset);
219391bc56edSDimitry Andric     return LOffset > ROffset;
219491bc56edSDimitry Andric   });
2195f22ef01cSRoman Divacky 
2196f22ef01cSRoman Divacky   // The loads / stores of the same base are in order. Scan them from first to
2197ffd1746dSEd Schouten   // last and check for the following:
2198f22ef01cSRoman Divacky   // 1. Any def of base.
2199f22ef01cSRoman Divacky   // 2. Any gaps.
2200f22ef01cSRoman Divacky   while (Ops.size() > 1) {
2201f22ef01cSRoman Divacky     unsigned FirstLoc = ~0U;
2202f22ef01cSRoman Divacky     unsigned LastLoc = 0;
220391bc56edSDimitry Andric     MachineInstr *FirstOp = nullptr;
220491bc56edSDimitry Andric     MachineInstr *LastOp = nullptr;
2205f22ef01cSRoman Divacky     int LastOffset = 0;
2206f22ef01cSRoman Divacky     unsigned LastOpcode = 0;
2207f22ef01cSRoman Divacky     unsigned LastBytes = 0;
2208f22ef01cSRoman Divacky     unsigned NumMove = 0;
2209f22ef01cSRoman Divacky     for (int i = Ops.size() - 1; i >= 0; --i) {
22107a7e6055SDimitry Andric       // Make sure each operation has the same kind.
2211f22ef01cSRoman Divacky       MachineInstr *Op = Ops[i];
22127a7e6055SDimitry Andric       unsigned LSMOpcode
22137a7e6055SDimitry Andric         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
22147a7e6055SDimitry Andric       if (LastOpcode && LSMOpcode != LastOpcode)
22157a7e6055SDimitry Andric         break;
22167a7e6055SDimitry Andric 
22177a7e6055SDimitry Andric       // Check that we have a continuous set of offsets.
22187a7e6055SDimitry Andric       int Offset = getMemoryOpOffset(*Op);
22197a7e6055SDimitry Andric       unsigned Bytes = getLSMultipleTransferSize(Op);
22207a7e6055SDimitry Andric       if (LastBytes) {
22217a7e6055SDimitry Andric         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
22227a7e6055SDimitry Andric           break;
22237a7e6055SDimitry Andric       }
22247a7e6055SDimitry Andric 
22257a7e6055SDimitry Andric       // Don't try to reschedule too many instructions.
22267a7e6055SDimitry Andric       if (NumMove == 8) // FIXME: Tune this limit.
22277a7e6055SDimitry Andric         break;
22287a7e6055SDimitry Andric 
22297a7e6055SDimitry Andric       // Found a mergable instruction; save information about it.
22307a7e6055SDimitry Andric       ++NumMove;
22317a7e6055SDimitry Andric       LastOffset = Offset;
22327a7e6055SDimitry Andric       LastBytes = Bytes;
22337a7e6055SDimitry Andric       LastOpcode = LSMOpcode;
22347a7e6055SDimitry Andric 
2235f22ef01cSRoman Divacky       unsigned Loc = MI2LocMap[Op];
2236f22ef01cSRoman Divacky       if (Loc <= FirstLoc) {
2237f22ef01cSRoman Divacky         FirstLoc = Loc;
2238f22ef01cSRoman Divacky         FirstOp = Op;
2239f22ef01cSRoman Divacky       }
2240f22ef01cSRoman Divacky       if (Loc >= LastLoc) {
2241f22ef01cSRoman Divacky         LastLoc = Loc;
2242f22ef01cSRoman Divacky         LastOp = Op;
2243f22ef01cSRoman Divacky       }
2244f22ef01cSRoman Divacky     }
2245f22ef01cSRoman Divacky 
2246f22ef01cSRoman Divacky     if (NumMove <= 1)
2247f22ef01cSRoman Divacky       Ops.pop_back();
2248f22ef01cSRoman Divacky     else {
2249f22ef01cSRoman Divacky       SmallPtrSet<MachineInstr*, 4> MemOps;
2250f22ef01cSRoman Divacky       SmallSet<unsigned, 4> MemRegs;
22517a7e6055SDimitry Andric       for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2252f22ef01cSRoman Divacky         MemOps.insert(Ops[i]);
2253f22ef01cSRoman Divacky         MemRegs.insert(Ops[i]->getOperand(0).getReg());
2254f22ef01cSRoman Divacky       }
2255f22ef01cSRoman Divacky 
2256f22ef01cSRoman Divacky       // Be conservative, if the instructions are too far apart, don't
2257f22ef01cSRoman Divacky       // move them. We want to limit the increase of register pressure.
2258f22ef01cSRoman Divacky       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2259f22ef01cSRoman Divacky       if (DoMove)
2260f22ef01cSRoman Divacky         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
22617a7e6055SDimitry Andric                                            MemOps, MemRegs, TRI, AA);
2262f22ef01cSRoman Divacky       if (!DoMove) {
2263f22ef01cSRoman Divacky         for (unsigned i = 0; i != NumMove; ++i)
2264f22ef01cSRoman Divacky           Ops.pop_back();
2265f22ef01cSRoman Divacky       } else {
2266f22ef01cSRoman Divacky         // This is the new location for the loads / stores.
2267f22ef01cSRoman Divacky         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
22683ca95b02SDimitry Andric         while (InsertPos != MBB->end() &&
22694ba319b5SDimitry Andric                (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2270f22ef01cSRoman Divacky           ++InsertPos;
2271f22ef01cSRoman Divacky 
2272f22ef01cSRoman Divacky         // If we are moving a pair of loads / stores, see if it makes sense
2273f22ef01cSRoman Divacky         // to try to allocate a pair of registers that can form register pairs.
2274f22ef01cSRoman Divacky         MachineInstr *Op0 = Ops.back();
2275f22ef01cSRoman Divacky         MachineInstr *Op1 = Ops[Ops.size()-2];
227697bc6c73SDimitry Andric         unsigned FirstReg = 0, SecondReg = 0;
22772754fe60SDimitry Andric         unsigned BaseReg = 0, PredReg = 0;
2278f22ef01cSRoman Divacky         ARMCC::CondCodes Pred = ARMCC::AL;
2279f22ef01cSRoman Divacky         bool isT2 = false;
2280f22ef01cSRoman Divacky         unsigned NewOpc = 0;
2281f22ef01cSRoman Divacky         int Offset = 0;
2282f22ef01cSRoman Divacky         DebugLoc dl;
2283f22ef01cSRoman Divacky         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
228497bc6c73SDimitry Andric                                              FirstReg, SecondReg, BaseReg,
2285f22ef01cSRoman Divacky                                              Offset, PredReg, Pred, isT2)) {
2286f22ef01cSRoman Divacky           Ops.pop_back();
2287f22ef01cSRoman Divacky           Ops.pop_back();
2288f22ef01cSRoman Divacky 
228917a519f9SDimitry Andric           const MCInstrDesc &MCID = TII->get(NewOpc);
22907ae0e2c9SDimitry Andric           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
229197bc6c73SDimitry Andric           MRI->constrainRegClass(FirstReg, TRC);
229297bc6c73SDimitry Andric           MRI->constrainRegClass(SecondReg, TRC);
2293bd5abe19SDimitry Andric 
2294f22ef01cSRoman Divacky           // Form the pair instruction.
2295f22ef01cSRoman Divacky           if (isLd) {
229617a519f9SDimitry Andric             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
229797bc6c73SDimitry Andric               .addReg(FirstReg, RegState::Define)
229897bc6c73SDimitry Andric               .addReg(SecondReg, RegState::Define)
2299f22ef01cSRoman Divacky               .addReg(BaseReg);
23002754fe60SDimitry Andric             // FIXME: We're converting from LDRi12 to an insn that still
23012754fe60SDimitry Andric             // uses addrmode2, so we need an explicit offset reg. It should
23022754fe60SDimitry Andric             // always by reg0 since we're transforming LDRi12s.
2303f22ef01cSRoman Divacky             if (!isT2)
23042754fe60SDimitry Andric               MIB.addReg(0);
2305f22ef01cSRoman Divacky             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2306*b5893f02SDimitry Andric             MIB.cloneMergedMemRefs({Op0, Op1});
23074ba319b5SDimitry Andric             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2308f22ef01cSRoman Divacky             ++NumLDRDFormed;
2309f22ef01cSRoman Divacky           } else {
231017a519f9SDimitry Andric             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
231197bc6c73SDimitry Andric               .addReg(FirstReg)
231297bc6c73SDimitry Andric               .addReg(SecondReg)
2313f22ef01cSRoman Divacky               .addReg(BaseReg);
23142754fe60SDimitry Andric             // FIXME: We're converting from LDRi12 to an insn that still
23152754fe60SDimitry Andric             // uses addrmode2, so we need an explicit offset reg. It should
23162754fe60SDimitry Andric             // always by reg0 since we're transforming STRi12s.
2317f22ef01cSRoman Divacky             if (!isT2)
23182754fe60SDimitry Andric               MIB.addReg(0);
2319f22ef01cSRoman Divacky             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2320*b5893f02SDimitry Andric             MIB.cloneMergedMemRefs({Op0, Op1});
23214ba319b5SDimitry Andric             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2322f22ef01cSRoman Divacky             ++NumSTRDFormed;
2323f22ef01cSRoman Divacky           }
2324f22ef01cSRoman Divacky           MBB->erase(Op0);
2325f22ef01cSRoman Divacky           MBB->erase(Op1);
2326f22ef01cSRoman Divacky 
232797bc6c73SDimitry Andric           if (!isT2) {
2328f22ef01cSRoman Divacky             // Add register allocation hints to form register pairs.
232997bc6c73SDimitry Andric             MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
233097bc6c73SDimitry Andric             MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
233197bc6c73SDimitry Andric           }
2332f22ef01cSRoman Divacky         } else {
2333f22ef01cSRoman Divacky           for (unsigned i = 0; i != NumMove; ++i) {
2334f22ef01cSRoman Divacky             MachineInstr *Op = Ops.back();
2335f22ef01cSRoman Divacky             Ops.pop_back();
2336f22ef01cSRoman Divacky             MBB->splice(InsertPos, MBB, Op);
2337f22ef01cSRoman Divacky           }
2338f22ef01cSRoman Divacky         }
2339f22ef01cSRoman Divacky 
2340f22ef01cSRoman Divacky         NumLdStMoved += NumMove;
2341f22ef01cSRoman Divacky         RetVal = true;
2342f22ef01cSRoman Divacky       }
2343f22ef01cSRoman Divacky     }
2344f22ef01cSRoman Divacky   }
2345f22ef01cSRoman Divacky 
2346f22ef01cSRoman Divacky   return RetVal;
2347f22ef01cSRoman Divacky }
2348f22ef01cSRoman Divacky 
2349f22ef01cSRoman Divacky bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)2350f22ef01cSRoman Divacky ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2351f22ef01cSRoman Divacky   bool RetVal = false;
2352f22ef01cSRoman Divacky 
2353f22ef01cSRoman Divacky   DenseMap<MachineInstr*, unsigned> MI2LocMap;
2354f22ef01cSRoman Divacky   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
2355f22ef01cSRoman Divacky   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
2356f22ef01cSRoman Divacky   SmallVector<unsigned, 4> LdBases;
2357f22ef01cSRoman Divacky   SmallVector<unsigned, 4> StBases;
2358f22ef01cSRoman Divacky 
2359f22ef01cSRoman Divacky   unsigned Loc = 0;
2360f22ef01cSRoman Divacky   MachineBasicBlock::iterator MBBI = MBB->begin();
2361f22ef01cSRoman Divacky   MachineBasicBlock::iterator E = MBB->end();
2362f22ef01cSRoman Divacky   while (MBBI != E) {
2363f22ef01cSRoman Divacky     for (; MBBI != E; ++MBBI) {
23643ca95b02SDimitry Andric       MachineInstr &MI = *MBBI;
23653ca95b02SDimitry Andric       if (MI.isCall() || MI.isTerminator()) {
2366f22ef01cSRoman Divacky         // Stop at barriers.
2367f22ef01cSRoman Divacky         ++MBBI;
2368f22ef01cSRoman Divacky         break;
2369f22ef01cSRoman Divacky       }
2370f22ef01cSRoman Divacky 
23714ba319b5SDimitry Andric       if (!MI.isDebugInstr())
23723ca95b02SDimitry Andric         MI2LocMap[&MI] = ++Loc;
2373ffd1746dSEd Schouten 
23743ca95b02SDimitry Andric       if (!isMemoryOp(MI))
2375f22ef01cSRoman Divacky         continue;
2376f22ef01cSRoman Divacky       unsigned PredReg = 0;
2377dff0c46cSDimitry Andric       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2378f22ef01cSRoman Divacky         continue;
2379f22ef01cSRoman Divacky 
23803ca95b02SDimitry Andric       int Opc = MI.getOpcode();
2381875ed548SDimitry Andric       bool isLd = isLoadSingle(Opc);
23823ca95b02SDimitry Andric       unsigned Base = MI.getOperand(1).getReg();
2383f22ef01cSRoman Divacky       int Offset = getMemoryOpOffset(MI);
2384f22ef01cSRoman Divacky 
2385f22ef01cSRoman Divacky       bool StopHere = false;
2386f22ef01cSRoman Divacky       if (isLd) {
2387f22ef01cSRoman Divacky         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2388f22ef01cSRoman Divacky           Base2LdsMap.find(Base);
2389f22ef01cSRoman Divacky         if (BI != Base2LdsMap.end()) {
2390f22ef01cSRoman Divacky           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
23913ca95b02SDimitry Andric             if (Offset == getMemoryOpOffset(*BI->second[i])) {
2392f22ef01cSRoman Divacky               StopHere = true;
2393f22ef01cSRoman Divacky               break;
2394f22ef01cSRoman Divacky             }
2395f22ef01cSRoman Divacky           }
2396f22ef01cSRoman Divacky           if (!StopHere)
23973ca95b02SDimitry Andric             BI->second.push_back(&MI);
2398f22ef01cSRoman Divacky         } else {
23993ca95b02SDimitry Andric           Base2LdsMap[Base].push_back(&MI);
2400f22ef01cSRoman Divacky           LdBases.push_back(Base);
2401f22ef01cSRoman Divacky         }
2402f22ef01cSRoman Divacky       } else {
2403f22ef01cSRoman Divacky         DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2404f22ef01cSRoman Divacky           Base2StsMap.find(Base);
2405f22ef01cSRoman Divacky         if (BI != Base2StsMap.end()) {
2406f22ef01cSRoman Divacky           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
24073ca95b02SDimitry Andric             if (Offset == getMemoryOpOffset(*BI->second[i])) {
2408f22ef01cSRoman Divacky               StopHere = true;
2409f22ef01cSRoman Divacky               break;
2410f22ef01cSRoman Divacky             }
2411f22ef01cSRoman Divacky           }
2412f22ef01cSRoman Divacky           if (!StopHere)
24133ca95b02SDimitry Andric             BI->second.push_back(&MI);
2414f22ef01cSRoman Divacky         } else {
24153ca95b02SDimitry Andric           Base2StsMap[Base].push_back(&MI);
2416f22ef01cSRoman Divacky           StBases.push_back(Base);
2417f22ef01cSRoman Divacky         }
2418f22ef01cSRoman Divacky       }
2419f22ef01cSRoman Divacky 
2420f22ef01cSRoman Divacky       if (StopHere) {
2421f22ef01cSRoman Divacky         // Found a duplicate (a base+offset combination that's seen earlier).
2422f22ef01cSRoman Divacky         // Backtrack.
2423f22ef01cSRoman Divacky         --Loc;
2424f22ef01cSRoman Divacky         break;
2425f22ef01cSRoman Divacky       }
2426f22ef01cSRoman Divacky     }
2427f22ef01cSRoman Divacky 
2428f22ef01cSRoman Divacky     // Re-schedule loads.
2429f22ef01cSRoman Divacky     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2430f22ef01cSRoman Divacky       unsigned Base = LdBases[i];
2431f785676fSDimitry Andric       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2432f22ef01cSRoman Divacky       if (Lds.size() > 1)
2433f22ef01cSRoman Divacky         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2434f22ef01cSRoman Divacky     }
2435f22ef01cSRoman Divacky 
2436f22ef01cSRoman Divacky     // Re-schedule stores.
2437f22ef01cSRoman Divacky     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2438f22ef01cSRoman Divacky       unsigned Base = StBases[i];
2439f785676fSDimitry Andric       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2440f22ef01cSRoman Divacky       if (Sts.size() > 1)
2441f22ef01cSRoman Divacky         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2442f22ef01cSRoman Divacky     }
2443f22ef01cSRoman Divacky 
2444f22ef01cSRoman Divacky     if (MBBI != E) {
2445f22ef01cSRoman Divacky       Base2LdsMap.clear();
2446f22ef01cSRoman Divacky       Base2StsMap.clear();
2447f22ef01cSRoman Divacky       LdBases.clear();
2448f22ef01cSRoman Divacky       StBases.clear();
2449f22ef01cSRoman Divacky     }
2450f22ef01cSRoman Divacky   }
2451f22ef01cSRoman Divacky 
2452f22ef01cSRoman Divacky   return RetVal;
2453f22ef01cSRoman Divacky }
2454f22ef01cSRoman Divacky 
245597bc6c73SDimitry Andric /// Returns an instance of the load / store optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)2456f22ef01cSRoman Divacky FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2457f22ef01cSRoman Divacky   if (PreAlloc)
2458f22ef01cSRoman Divacky     return new ARMPreAllocLoadStoreOpt();
2459f22ef01cSRoman Divacky   return new ARMLoadStoreOpt();
2460f22ef01cSRoman Divacky }
2461