12cab237bSDimitry Andric //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
2f22ef01cSRoman Divacky //
3f22ef01cSRoman Divacky // The LLVM Compiler Infrastructure
4f22ef01cSRoman Divacky //
5f22ef01cSRoman Divacky // This file is distributed under the University of Illinois Open Source
6f22ef01cSRoman Divacky // License. See LICENSE.TXT for details.
7f22ef01cSRoman Divacky //
8f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
9f22ef01cSRoman Divacky //
1097bc6c73SDimitry Andric /// \file This file contains a pass that performs load / store related peephole
1197bc6c73SDimitry Andric /// optimizations. This pass should be run after register allocation.
12f22ef01cSRoman Divacky //
13f22ef01cSRoman Divacky //===----------------------------------------------------------------------===//
14f22ef01cSRoman Divacky
15f22ef01cSRoman Divacky #include "ARM.h"
16f22ef01cSRoman Divacky #include "ARMBaseInstrInfo.h"
17dff0c46cSDimitry Andric #include "ARMBaseRegisterInfo.h"
1891bc56edSDimitry Andric #include "ARMISelLowering.h"
19f22ef01cSRoman Divacky #include "ARMMachineFunctionInfo.h"
2091bc56edSDimitry Andric #include "ARMSubtarget.h"
216122f3e6SDimitry Andric #include "MCTargetDesc/ARMAddressingModes.h"
222cab237bSDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h"
232cab237bSDimitry Andric #include "Utils/ARMBaseInfo.h"
242cab237bSDimitry Andric #include "llvm/ADT/ArrayRef.h"
25139f7f9bSDimitry Andric #include "llvm/ADT/DenseMap.h"
262cab237bSDimitry Andric #include "llvm/ADT/DenseSet.h"
27139f7f9bSDimitry Andric #include "llvm/ADT/STLExtras.h"
28139f7f9bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
29139f7f9bSDimitry Andric #include "llvm/ADT/SmallSet.h"
30139f7f9bSDimitry Andric #include "llvm/ADT/SmallVector.h"
31139f7f9bSDimitry Andric #include "llvm/ADT/Statistic.h"
322cab237bSDimitry Andric #include "llvm/ADT/iterator_range.h"
332cab237bSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
34db17bf38SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
35f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineBasicBlock.h"
362cab237bSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
37f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineFunctionPass.h"
38f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineInstr.h"
39f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineInstrBuilder.h"
402cab237bSDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
412cab237bSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
42f22ef01cSRoman Divacky #include "llvm/CodeGen/MachineRegisterInfo.h"
43875ed548SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
442cab237bSDimitry Andric #include "llvm/CodeGen/TargetFrameLowering.h"
452cab237bSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
462cab237bSDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
472cab237bSDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
482cab237bSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
49139f7f9bSDimitry Andric #include "llvm/IR/DataLayout.h"
502cab237bSDimitry Andric #include "llvm/IR/DebugLoc.h"
51139f7f9bSDimitry Andric #include "llvm/IR/DerivedTypes.h"
52139f7f9bSDimitry Andric #include "llvm/IR/Function.h"
532cab237bSDimitry Andric #include "llvm/IR/Type.h"
542cab237bSDimitry Andric #include "llvm/MC/MCInstrDesc.h"
552cab237bSDimitry Andric #include "llvm/Pass.h"
56875ed548SDimitry Andric #include "llvm/Support/Allocator.h"
572cab237bSDimitry Andric #include "llvm/Support/CommandLine.h"
58139f7f9bSDimitry Andric #include "llvm/Support/Debug.h"
59139f7f9bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
60ff0cc061SDimitry Andric #include "llvm/Support/raw_ostream.h"
612cab237bSDimitry Andric #include <algorithm>
622cab237bSDimitry Andric #include <cassert>
632cab237bSDimitry Andric #include <cstddef>
642cab237bSDimitry Andric #include <cstdlib>
652cab237bSDimitry Andric #include <iterator>
662cab237bSDimitry Andric #include <limits>
672cab237bSDimitry Andric #include <utility>
682cab237bSDimitry Andric
69f22ef01cSRoman Divacky using namespace llvm;
70f22ef01cSRoman Divacky
7191bc56edSDimitry Andric #define DEBUG_TYPE "arm-ldst-opt"
7291bc56edSDimitry Andric
73f22ef01cSRoman Divacky STATISTIC(NumLDMGened , "Number of ldm instructions generated");
74f22ef01cSRoman Divacky STATISTIC(NumSTMGened , "Number of stm instructions generated");
75f22ef01cSRoman Divacky STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
76f22ef01cSRoman Divacky STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
77f22ef01cSRoman Divacky STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
78f22ef01cSRoman Divacky STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
79f22ef01cSRoman Divacky STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
80f22ef01cSRoman Divacky STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
81f22ef01cSRoman Divacky STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
82f22ef01cSRoman Divacky STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
83f22ef01cSRoman Divacky STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
84f22ef01cSRoman Divacky
853ca95b02SDimitry Andric /// This switch disables formation of double/multi instructions that could
863ca95b02SDimitry Andric /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
873ca95b02SDimitry Andric /// disabled. This can be used to create libraries that are robust even when
883ca95b02SDimitry Andric /// users provoke undefined behaviour by supplying misaligned pointers.
893ca95b02SDimitry Andric /// \see mayCombineMisaligned()
903ca95b02SDimitry Andric static cl::opt<bool>
913ca95b02SDimitry Andric AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
923ca95b02SDimitry Andric cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
937d523365SDimitry Andric
947d523365SDimitry Andric #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
957d523365SDimitry Andric
96f22ef01cSRoman Divacky namespace {
972cab237bSDimitry Andric
9897bc6c73SDimitry Andric /// Post- register allocation pass the combine load / store instructions to
9997bc6c73SDimitry Andric /// form ldm / stm instructions.
100f22ef01cSRoman Divacky struct ARMLoadStoreOpt : public MachineFunctionPass {
101f22ef01cSRoman Divacky static char ID;
102f22ef01cSRoman Divacky
103875ed548SDimitry Andric const MachineFunction *MF;
104f22ef01cSRoman Divacky const TargetInstrInfo *TII;
105f22ef01cSRoman Divacky const TargetRegisterInfo *TRI;
106dff0c46cSDimitry Andric const ARMSubtarget *STI;
10791bc56edSDimitry Andric const TargetLowering *TL;
108f22ef01cSRoman Divacky ARMFunctionInfo *AFI;
109875ed548SDimitry Andric LivePhysRegs LiveRegs;
110875ed548SDimitry Andric RegisterClassInfo RegClassInfo;
111875ed548SDimitry Andric MachineBasicBlock::const_iterator LiveRegPos;
112875ed548SDimitry Andric bool LiveRegsValid;
113875ed548SDimitry Andric bool RegClassInfoValid;
11491bc56edSDimitry Andric bool isThumb1, isThumb2;
115f22ef01cSRoman Divacky
ARMLoadStoreOpt__anon9023e32a0111::ARMLoadStoreOpt1162cab237bSDimitry Andric ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
1172cab237bSDimitry Andric
11891bc56edSDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override;
119f22ef01cSRoman Divacky
getRequiredProperties__anon9023e32a0111::ARMLoadStoreOpt1203ca95b02SDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
1213ca95b02SDimitry Andric return MachineFunctionProperties().set(
122d88c1a5aSDimitry Andric MachineFunctionProperties::Property::NoVRegs);
1233ca95b02SDimitry Andric }
1243ca95b02SDimitry Andric
getPassName__anon9023e32a0111::ARMLoadStoreOpt125d88c1a5aSDimitry Andric StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
126f22ef01cSRoman Divacky
127f22ef01cSRoman Divacky private:
128875ed548SDimitry Andric /// A set of load/store MachineInstrs with same base register sorted by
129875ed548SDimitry Andric /// offset.
130f22ef01cSRoman Divacky struct MemOpQueueEntry {
131875ed548SDimitry Andric MachineInstr *MI;
132875ed548SDimitry Andric int Offset; ///< Load/Store offset.
133875ed548SDimitry Andric unsigned Position; ///< Position as counted from end of basic block.
1342cab237bSDimitry Andric
MemOpQueueEntry__anon9023e32a0111::ARMLoadStoreOpt::MemOpQueueEntry1353ca95b02SDimitry Andric MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
1363ca95b02SDimitry Andric : MI(&MI), Offset(Offset), Position(Position) {}
137f22ef01cSRoman Divacky };
1382cab237bSDimitry Andric using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
139f22ef01cSRoman Divacky
140875ed548SDimitry Andric /// A set of MachineInstrs that fulfill (nearly all) conditions to get
141875ed548SDimitry Andric /// merged into a LDM/STM.
142875ed548SDimitry Andric struct MergeCandidate {
143875ed548SDimitry Andric /// List of instructions ordered by load/store offset.
144875ed548SDimitry Andric SmallVector<MachineInstr*, 4> Instrs;
1452cab237bSDimitry Andric
146875ed548SDimitry Andric /// Index in Instrs of the instruction being latest in the schedule.
147875ed548SDimitry Andric unsigned LatestMIIdx;
1482cab237bSDimitry Andric
149875ed548SDimitry Andric /// Index in Instrs of the instruction being earliest in the schedule.
150875ed548SDimitry Andric unsigned EarliestMIIdx;
1512cab237bSDimitry Andric
152875ed548SDimitry Andric /// Index into the basic block where the merged instruction will be
153875ed548SDimitry Andric /// inserted. (See MemOpQueueEntry.Position)
154875ed548SDimitry Andric unsigned InsertPos;
1552cab237bSDimitry Andric
156875ed548SDimitry Andric /// Whether the instructions can be merged into a ldm/stm instruction.
157875ed548SDimitry Andric bool CanMergeToLSMulti;
1582cab237bSDimitry Andric
159875ed548SDimitry Andric /// Whether the instructions can be merged into a ldrd/strd instruction.
160875ed548SDimitry Andric bool CanMergeToLSDouble;
161875ed548SDimitry Andric };
162875ed548SDimitry Andric SpecificBumpPtrAllocator<MergeCandidate> Allocator;
163875ed548SDimitry Andric SmallVector<const MergeCandidate*,4> Candidates;
1647d523365SDimitry Andric SmallVector<MachineInstr*,4> MergeBaseCandidates;
165875ed548SDimitry Andric
166875ed548SDimitry Andric void moveLiveRegsBefore(const MachineBasicBlock &MBB,
167875ed548SDimitry Andric MachineBasicBlock::const_iterator Before);
168875ed548SDimitry Andric unsigned findFreeReg(const TargetRegisterClass &RegClass);
16991bc56edSDimitry Andric void UpdateBaseRegUses(MachineBasicBlock &MBB,
1703ca95b02SDimitry Andric MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1713ca95b02SDimitry Andric unsigned Base, unsigned WordOffset,
17291bc56edSDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg);
1733ca95b02SDimitry Andric MachineInstr *CreateLoadStoreMulti(
1743ca95b02SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
1753ca95b02SDimitry Andric int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
1763ca95b02SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
1773ca95b02SDimitry Andric ArrayRef<std::pair<unsigned, bool>> Regs);
1783ca95b02SDimitry Andric MachineInstr *CreateLoadStoreDouble(
1793ca95b02SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
1803ca95b02SDimitry Andric int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
1813ca95b02SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
1823ca95b02SDimitry Andric ArrayRef<std::pair<unsigned, bool>> Regs) const;
183875ed548SDimitry Andric void FormCandidates(const MemOpQueue &MemOps);
184875ed548SDimitry Andric MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
185f22ef01cSRoman Divacky bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
186f22ef01cSRoman Divacky MachineBasicBlock::iterator &MBBI);
187875ed548SDimitry Andric bool MergeBaseUpdateLoadStore(MachineInstr *MI);
188875ed548SDimitry Andric bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
1897d523365SDimitry Andric bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
190f22ef01cSRoman Divacky bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
191f22ef01cSRoman Divacky bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
1927d523365SDimitry Andric bool CombineMovBx(MachineBasicBlock &MBB);
193f22ef01cSRoman Divacky };
1942cab237bSDimitry Andric
1952cab237bSDimitry Andric } // end anonymous namespace
1962cab237bSDimitry Andric
197f22ef01cSRoman Divacky char ARMLoadStoreOpt::ID = 0;
198f22ef01cSRoman Divacky
1993ca95b02SDimitry Andric INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
2003ca95b02SDimitry Andric false)
2017d523365SDimitry Andric
definesCPSR(const MachineInstr & MI)2023ca95b02SDimitry Andric static bool definesCPSR(const MachineInstr &MI) {
2033ca95b02SDimitry Andric for (const auto &MO : MI.operands()) {
20439d628a0SDimitry Andric if (!MO.isReg())
20539d628a0SDimitry Andric continue;
20639d628a0SDimitry Andric if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
20739d628a0SDimitry Andric // If the instruction has live CPSR def, then it's not safe to fold it
20839d628a0SDimitry Andric // into load / store.
20939d628a0SDimitry Andric return true;
21039d628a0SDimitry Andric }
21139d628a0SDimitry Andric
21239d628a0SDimitry Andric return false;
21339d628a0SDimitry Andric }
21439d628a0SDimitry Andric
getMemoryOpOffset(const MachineInstr & MI)2153ca95b02SDimitry Andric static int getMemoryOpOffset(const MachineInstr &MI) {
2163ca95b02SDimitry Andric unsigned Opcode = MI.getOpcode();
21739d628a0SDimitry Andric bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
2183ca95b02SDimitry Andric unsigned NumOperands = MI.getDesc().getNumOperands();
2193ca95b02SDimitry Andric unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
22039d628a0SDimitry Andric
22139d628a0SDimitry Andric if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
22239d628a0SDimitry Andric Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
22339d628a0SDimitry Andric Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
22439d628a0SDimitry Andric Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
22539d628a0SDimitry Andric return OffField;
22639d628a0SDimitry Andric
22739d628a0SDimitry Andric // Thumb1 immediate offsets are scaled by 4
228ff0cc061SDimitry Andric if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
229ff0cc061SDimitry Andric Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
23039d628a0SDimitry Andric return OffField * 4;
23139d628a0SDimitry Andric
23239d628a0SDimitry Andric int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
23339d628a0SDimitry Andric : ARM_AM::getAM5Offset(OffField) * 4;
23439d628a0SDimitry Andric ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
23539d628a0SDimitry Andric : ARM_AM::getAM5Op(OffField);
23639d628a0SDimitry Andric
23739d628a0SDimitry Andric if (Op == ARM_AM::sub)
23839d628a0SDimitry Andric return -Offset;
23939d628a0SDimitry Andric
24039d628a0SDimitry Andric return Offset;
24139d628a0SDimitry Andric }
24239d628a0SDimitry Andric
getLoadStoreBaseOp(const MachineInstr & MI)243875ed548SDimitry Andric static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
244875ed548SDimitry Andric return MI.getOperand(1);
245875ed548SDimitry Andric }
246875ed548SDimitry Andric
getLoadStoreRegOp(const MachineInstr & MI)247875ed548SDimitry Andric static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
248875ed548SDimitry Andric return MI.getOperand(0);
249875ed548SDimitry Andric }
250875ed548SDimitry Andric
getLoadStoreMultipleOpcode(unsigned Opcode,ARM_AM::AMSubMode Mode)251ff0cc061SDimitry Andric static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
252f22ef01cSRoman Divacky switch (Opcode) {
2532754fe60SDimitry Andric default: llvm_unreachable("Unhandled opcode!");
2542754fe60SDimitry Andric case ARM::LDRi12:
255ffd1746dSEd Schouten ++NumLDMGened;
2562754fe60SDimitry Andric switch (Mode) {
2572754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
2582754fe60SDimitry Andric case ARM_AM::ia: return ARM::LDMIA;
2592754fe60SDimitry Andric case ARM_AM::da: return ARM::LDMDA;
2602754fe60SDimitry Andric case ARM_AM::db: return ARM::LDMDB;
2612754fe60SDimitry Andric case ARM_AM::ib: return ARM::LDMIB;
2622754fe60SDimitry Andric }
2632754fe60SDimitry Andric case ARM::STRi12:
264ffd1746dSEd Schouten ++NumSTMGened;
2652754fe60SDimitry Andric switch (Mode) {
2662754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
2672754fe60SDimitry Andric case ARM_AM::ia: return ARM::STMIA;
2682754fe60SDimitry Andric case ARM_AM::da: return ARM::STMDA;
2692754fe60SDimitry Andric case ARM_AM::db: return ARM::STMDB;
2702754fe60SDimitry Andric case ARM_AM::ib: return ARM::STMIB;
2712754fe60SDimitry Andric }
27291bc56edSDimitry Andric case ARM::tLDRi:
273ff0cc061SDimitry Andric case ARM::tLDRspi:
27491bc56edSDimitry Andric // tLDMIA is writeback-only - unless the base register is in the input
27591bc56edSDimitry Andric // reglist.
27691bc56edSDimitry Andric ++NumLDMGened;
27791bc56edSDimitry Andric switch (Mode) {
27891bc56edSDimitry Andric default: llvm_unreachable("Unhandled submode!");
27991bc56edSDimitry Andric case ARM_AM::ia: return ARM::tLDMIA;
28091bc56edSDimitry Andric }
28191bc56edSDimitry Andric case ARM::tSTRi:
282ff0cc061SDimitry Andric case ARM::tSTRspi:
28391bc56edSDimitry Andric // There is no non-writeback tSTMIA either.
28491bc56edSDimitry Andric ++NumSTMGened;
28591bc56edSDimitry Andric switch (Mode) {
28691bc56edSDimitry Andric default: llvm_unreachable("Unhandled submode!");
28791bc56edSDimitry Andric case ARM_AM::ia: return ARM::tSTMIA_UPD;
28891bc56edSDimitry Andric }
289f22ef01cSRoman Divacky case ARM::t2LDRi8:
290f22ef01cSRoman Divacky case ARM::t2LDRi12:
291ffd1746dSEd Schouten ++NumLDMGened;
2922754fe60SDimitry Andric switch (Mode) {
2932754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
2942754fe60SDimitry Andric case ARM_AM::ia: return ARM::t2LDMIA;
2952754fe60SDimitry Andric case ARM_AM::db: return ARM::t2LDMDB;
2962754fe60SDimitry Andric }
297f22ef01cSRoman Divacky case ARM::t2STRi8:
298f22ef01cSRoman Divacky case ARM::t2STRi12:
299ffd1746dSEd Schouten ++NumSTMGened;
3002754fe60SDimitry Andric switch (Mode) {
3012754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
3022754fe60SDimitry Andric case ARM_AM::ia: return ARM::t2STMIA;
3032754fe60SDimitry Andric case ARM_AM::db: return ARM::t2STMDB;
3042754fe60SDimitry Andric }
305f22ef01cSRoman Divacky case ARM::VLDRS:
306ffd1746dSEd Schouten ++NumVLDMGened;
3072754fe60SDimitry Andric switch (Mode) {
3082754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
3092754fe60SDimitry Andric case ARM_AM::ia: return ARM::VLDMSIA;
3103b0f4066SDimitry Andric case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
3112754fe60SDimitry Andric }
312f22ef01cSRoman Divacky case ARM::VSTRS:
313ffd1746dSEd Schouten ++NumVSTMGened;
3142754fe60SDimitry Andric switch (Mode) {
3152754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
3162754fe60SDimitry Andric case ARM_AM::ia: return ARM::VSTMSIA;
3173b0f4066SDimitry Andric case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
3182754fe60SDimitry Andric }
319f22ef01cSRoman Divacky case ARM::VLDRD:
320ffd1746dSEd Schouten ++NumVLDMGened;
3212754fe60SDimitry Andric switch (Mode) {
3222754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
3232754fe60SDimitry Andric case ARM_AM::ia: return ARM::VLDMDIA;
3243b0f4066SDimitry Andric case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
3252754fe60SDimitry Andric }
326f22ef01cSRoman Divacky case ARM::VSTRD:
327ffd1746dSEd Schouten ++NumVSTMGened;
3282754fe60SDimitry Andric switch (Mode) {
3292754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
3302754fe60SDimitry Andric case ARM_AM::ia: return ARM::VSTMDIA;
3313b0f4066SDimitry Andric case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
332f22ef01cSRoman Divacky }
3332754fe60SDimitry Andric }
334f22ef01cSRoman Divacky }
335f22ef01cSRoman Divacky
getLoadStoreMultipleSubMode(unsigned Opcode)33697bc6c73SDimitry Andric static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
3372754fe60SDimitry Andric switch (Opcode) {
3382754fe60SDimitry Andric default: llvm_unreachable("Unhandled opcode!");
3392754fe60SDimitry Andric case ARM::LDMIA_RET:
3402754fe60SDimitry Andric case ARM::LDMIA:
3412754fe60SDimitry Andric case ARM::LDMIA_UPD:
3422754fe60SDimitry Andric case ARM::STMIA:
3432754fe60SDimitry Andric case ARM::STMIA_UPD:
34491bc56edSDimitry Andric case ARM::tLDMIA:
34591bc56edSDimitry Andric case ARM::tLDMIA_UPD:
34691bc56edSDimitry Andric case ARM::tSTMIA_UPD:
3472754fe60SDimitry Andric case ARM::t2LDMIA_RET:
3482754fe60SDimitry Andric case ARM::t2LDMIA:
3492754fe60SDimitry Andric case ARM::t2LDMIA_UPD:
3502754fe60SDimitry Andric case ARM::t2STMIA:
3512754fe60SDimitry Andric case ARM::t2STMIA_UPD:
3522754fe60SDimitry Andric case ARM::VLDMSIA:
3532754fe60SDimitry Andric case ARM::VLDMSIA_UPD:
3542754fe60SDimitry Andric case ARM::VSTMSIA:
3552754fe60SDimitry Andric case ARM::VSTMSIA_UPD:
3562754fe60SDimitry Andric case ARM::VLDMDIA:
3572754fe60SDimitry Andric case ARM::VLDMDIA_UPD:
3582754fe60SDimitry Andric case ARM::VSTMDIA:
3592754fe60SDimitry Andric case ARM::VSTMDIA_UPD:
3602754fe60SDimitry Andric return ARM_AM::ia;
3612754fe60SDimitry Andric
3622754fe60SDimitry Andric case ARM::LDMDA:
3632754fe60SDimitry Andric case ARM::LDMDA_UPD:
3642754fe60SDimitry Andric case ARM::STMDA:
3652754fe60SDimitry Andric case ARM::STMDA_UPD:
3662754fe60SDimitry Andric return ARM_AM::da;
3672754fe60SDimitry Andric
3682754fe60SDimitry Andric case ARM::LDMDB:
3692754fe60SDimitry Andric case ARM::LDMDB_UPD:
3702754fe60SDimitry Andric case ARM::STMDB:
3712754fe60SDimitry Andric case ARM::STMDB_UPD:
3722754fe60SDimitry Andric case ARM::t2LDMDB:
3732754fe60SDimitry Andric case ARM::t2LDMDB_UPD:
3742754fe60SDimitry Andric case ARM::t2STMDB:
3752754fe60SDimitry Andric case ARM::t2STMDB_UPD:
3762754fe60SDimitry Andric case ARM::VLDMSDB_UPD:
3772754fe60SDimitry Andric case ARM::VSTMSDB_UPD:
3782754fe60SDimitry Andric case ARM::VLDMDDB_UPD:
3792754fe60SDimitry Andric case ARM::VSTMDDB_UPD:
3802754fe60SDimitry Andric return ARM_AM::db;
3812754fe60SDimitry Andric
3822754fe60SDimitry Andric case ARM::LDMIB:
3832754fe60SDimitry Andric case ARM::LDMIB_UPD:
3842754fe60SDimitry Andric case ARM::STMIB:
3852754fe60SDimitry Andric case ARM::STMIB_UPD:
3862754fe60SDimitry Andric return ARM_AM::ib;
3872754fe60SDimitry Andric }
3882754fe60SDimitry Andric }
3892754fe60SDimitry Andric
isT1i32Load(unsigned Opc)39091bc56edSDimitry Andric static bool isT1i32Load(unsigned Opc) {
391ff0cc061SDimitry Andric return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
39291bc56edSDimitry Andric }
39391bc56edSDimitry Andric
isT2i32Load(unsigned Opc)394f22ef01cSRoman Divacky static bool isT2i32Load(unsigned Opc) {
395f22ef01cSRoman Divacky return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
396f22ef01cSRoman Divacky }
397f22ef01cSRoman Divacky
isi32Load(unsigned Opc)398f22ef01cSRoman Divacky static bool isi32Load(unsigned Opc) {
39991bc56edSDimitry Andric return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
40091bc56edSDimitry Andric }
40191bc56edSDimitry Andric
isT1i32Store(unsigned Opc)40291bc56edSDimitry Andric static bool isT1i32Store(unsigned Opc) {
403ff0cc061SDimitry Andric return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
404f22ef01cSRoman Divacky }
405f22ef01cSRoman Divacky
isT2i32Store(unsigned Opc)406f22ef01cSRoman Divacky static bool isT2i32Store(unsigned Opc) {
407f22ef01cSRoman Divacky return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
408f22ef01cSRoman Divacky }
409f22ef01cSRoman Divacky
isi32Store(unsigned Opc)410f22ef01cSRoman Divacky static bool isi32Store(unsigned Opc) {
41191bc56edSDimitry Andric return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
41291bc56edSDimitry Andric }
41391bc56edSDimitry Andric
isLoadSingle(unsigned Opc)414875ed548SDimitry Andric static bool isLoadSingle(unsigned Opc) {
415875ed548SDimitry Andric return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
416875ed548SDimitry Andric }
417875ed548SDimitry Andric
getImmScale(unsigned Opc)41891bc56edSDimitry Andric static unsigned getImmScale(unsigned Opc) {
41991bc56edSDimitry Andric switch (Opc) {
42091bc56edSDimitry Andric default: llvm_unreachable("Unhandled opcode!");
42191bc56edSDimitry Andric case ARM::tLDRi:
42291bc56edSDimitry Andric case ARM::tSTRi:
423ff0cc061SDimitry Andric case ARM::tLDRspi:
424ff0cc061SDimitry Andric case ARM::tSTRspi:
42591bc56edSDimitry Andric return 1;
42691bc56edSDimitry Andric case ARM::tLDRHi:
42791bc56edSDimitry Andric case ARM::tSTRHi:
42891bc56edSDimitry Andric return 2;
42991bc56edSDimitry Andric case ARM::tLDRBi:
43091bc56edSDimitry Andric case ARM::tSTRBi:
43191bc56edSDimitry Andric return 4;
43291bc56edSDimitry Andric }
43391bc56edSDimitry Andric }
43491bc56edSDimitry Andric
getLSMultipleTransferSize(const MachineInstr * MI)435875ed548SDimitry Andric static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
436f22ef01cSRoman Divacky switch (MI->getOpcode()) {
437f22ef01cSRoman Divacky default: return 0;
4382754fe60SDimitry Andric case ARM::LDRi12:
4392754fe60SDimitry Andric case ARM::STRi12:
44091bc56edSDimitry Andric case ARM::tLDRi:
44191bc56edSDimitry Andric case ARM::tSTRi:
442ff0cc061SDimitry Andric case ARM::tLDRspi:
443ff0cc061SDimitry Andric case ARM::tSTRspi:
444f22ef01cSRoman Divacky case ARM::t2LDRi8:
445f22ef01cSRoman Divacky case ARM::t2LDRi12:
446f22ef01cSRoman Divacky case ARM::t2STRi8:
447f22ef01cSRoman Divacky case ARM::t2STRi12:
448f22ef01cSRoman Divacky case ARM::VLDRS:
449f22ef01cSRoman Divacky case ARM::VSTRS:
450f22ef01cSRoman Divacky return 4;
451f22ef01cSRoman Divacky case ARM::VLDRD:
452f22ef01cSRoman Divacky case ARM::VSTRD:
453f22ef01cSRoman Divacky return 8;
4542754fe60SDimitry Andric case ARM::LDMIA:
4552754fe60SDimitry Andric case ARM::LDMDA:
4562754fe60SDimitry Andric case ARM::LDMDB:
4572754fe60SDimitry Andric case ARM::LDMIB:
4582754fe60SDimitry Andric case ARM::STMIA:
4592754fe60SDimitry Andric case ARM::STMDA:
4602754fe60SDimitry Andric case ARM::STMDB:
4612754fe60SDimitry Andric case ARM::STMIB:
46291bc56edSDimitry Andric case ARM::tLDMIA:
46391bc56edSDimitry Andric case ARM::tLDMIA_UPD:
46491bc56edSDimitry Andric case ARM::tSTMIA_UPD:
4652754fe60SDimitry Andric case ARM::t2LDMIA:
4662754fe60SDimitry Andric case ARM::t2LDMDB:
4672754fe60SDimitry Andric case ARM::t2STMIA:
4682754fe60SDimitry Andric case ARM::t2STMDB:
4692754fe60SDimitry Andric case ARM::VLDMSIA:
4702754fe60SDimitry Andric case ARM::VSTMSIA:
4712754fe60SDimitry Andric return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
4722754fe60SDimitry Andric case ARM::VLDMDIA:
4732754fe60SDimitry Andric case ARM::VSTMDIA:
4742754fe60SDimitry Andric return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
475f22ef01cSRoman Divacky }
476f22ef01cSRoman Divacky }
477f22ef01cSRoman Divacky
478875ed548SDimitry Andric /// Update future uses of the base register with the offset introduced
479875ed548SDimitry Andric /// due to writeback. This function only works on Thumb1.
UpdateBaseRegUses(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned Base,unsigned WordOffset,ARMCC::CondCodes Pred,unsigned PredReg)4803ca95b02SDimitry Andric void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
481875ed548SDimitry Andric MachineBasicBlock::iterator MBBI,
4823ca95b02SDimitry Andric const DebugLoc &DL, unsigned Base,
483875ed548SDimitry Andric unsigned WordOffset,
4843ca95b02SDimitry Andric ARMCC::CondCodes Pred,
4853ca95b02SDimitry Andric unsigned PredReg) {
486875ed548SDimitry Andric assert(isThumb1 && "Can only update base register uses for Thumb1!");
487875ed548SDimitry Andric // Start updating any instructions with immediate offsets. Insert a SUB before
488875ed548SDimitry Andric // the first non-updateable instruction (if any).
489875ed548SDimitry Andric for (; MBBI != MBB.end(); ++MBBI) {
490875ed548SDimitry Andric bool InsertSub = false;
491875ed548SDimitry Andric unsigned Opc = MBBI->getOpcode();
492875ed548SDimitry Andric
493875ed548SDimitry Andric if (MBBI->readsRegister(Base)) {
494875ed548SDimitry Andric int Offset;
495875ed548SDimitry Andric bool IsLoad =
496875ed548SDimitry Andric Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
497875ed548SDimitry Andric bool IsStore =
498875ed548SDimitry Andric Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
499875ed548SDimitry Andric
500875ed548SDimitry Andric if (IsLoad || IsStore) {
501875ed548SDimitry Andric // Loads and stores with immediate offsets can be updated, but only if
502875ed548SDimitry Andric // the new offset isn't negative.
503875ed548SDimitry Andric // The MachineOperand containing the offset immediate is the last one
504875ed548SDimitry Andric // before predicates.
505875ed548SDimitry Andric MachineOperand &MO =
506875ed548SDimitry Andric MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
507875ed548SDimitry Andric // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
508875ed548SDimitry Andric Offset = MO.getImm() - WordOffset * getImmScale(Opc);
509875ed548SDimitry Andric
510875ed548SDimitry Andric // If storing the base register, it needs to be reset first.
511875ed548SDimitry Andric unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
512875ed548SDimitry Andric
513875ed548SDimitry Andric if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
514875ed548SDimitry Andric MO.setImm(Offset);
515875ed548SDimitry Andric else
516875ed548SDimitry Andric InsertSub = true;
517875ed548SDimitry Andric } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
5183ca95b02SDimitry Andric !definesCPSR(*MBBI)) {
519875ed548SDimitry Andric // SUBS/ADDS using this register, with a dead def of the CPSR.
520875ed548SDimitry Andric // Merge it with the update; if the merged offset is too large,
521875ed548SDimitry Andric // insert a new sub instead.
522875ed548SDimitry Andric MachineOperand &MO =
523875ed548SDimitry Andric MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
524875ed548SDimitry Andric Offset = (Opc == ARM::tSUBi8) ?
525875ed548SDimitry Andric MO.getImm() + WordOffset * 4 :
526875ed548SDimitry Andric MO.getImm() - WordOffset * 4 ;
527875ed548SDimitry Andric if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
528875ed548SDimitry Andric // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
529875ed548SDimitry Andric // Offset == 0.
530875ed548SDimitry Andric MO.setImm(Offset);
531875ed548SDimitry Andric // The base register has now been reset, so exit early.
532875ed548SDimitry Andric return;
533875ed548SDimitry Andric } else {
534875ed548SDimitry Andric InsertSub = true;
535875ed548SDimitry Andric }
536875ed548SDimitry Andric } else {
537875ed548SDimitry Andric // Can't update the instruction.
538875ed548SDimitry Andric InsertSub = true;
539875ed548SDimitry Andric }
5403ca95b02SDimitry Andric } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
541875ed548SDimitry Andric // Since SUBS sets the condition flags, we can't place the base reset
542875ed548SDimitry Andric // after an instruction that has a live CPSR def.
543875ed548SDimitry Andric // The base register might also contain an argument for a function call.
544875ed548SDimitry Andric InsertSub = true;
545875ed548SDimitry Andric }
546875ed548SDimitry Andric
547875ed548SDimitry Andric if (InsertSub) {
548875ed548SDimitry Andric // An instruction above couldn't be updated, so insert a sub.
5497a7e6055SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
5507a7e6055SDimitry Andric .add(t1CondCodeOp(true))
5517a7e6055SDimitry Andric .addReg(Base)
5527a7e6055SDimitry Andric .addImm(WordOffset * 4)
5537a7e6055SDimitry Andric .addImm(Pred)
5547a7e6055SDimitry Andric .addReg(PredReg);
555875ed548SDimitry Andric return;
556875ed548SDimitry Andric }
557875ed548SDimitry Andric
558875ed548SDimitry Andric if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
559875ed548SDimitry Andric // Register got killed. Stop updating.
560875ed548SDimitry Andric return;
561875ed548SDimitry Andric }
562875ed548SDimitry Andric
563875ed548SDimitry Andric // End of block was reached.
564875ed548SDimitry Andric if (MBB.succ_size() > 0) {
565875ed548SDimitry Andric // FIXME: Because of a bug, live registers are sometimes missing from
566875ed548SDimitry Andric // the successor blocks' live-in sets. This means we can't trust that
567875ed548SDimitry Andric // information and *always* have to reset at the end of a block.
568875ed548SDimitry Andric // See PR21029.
569875ed548SDimitry Andric if (MBBI != MBB.end()) --MBBI;
5707a7e6055SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
5717a7e6055SDimitry Andric .add(t1CondCodeOp(true))
5727a7e6055SDimitry Andric .addReg(Base)
5737a7e6055SDimitry Andric .addImm(WordOffset * 4)
5747a7e6055SDimitry Andric .addImm(Pred)
5757a7e6055SDimitry Andric .addReg(PredReg);
576875ed548SDimitry Andric }
577875ed548SDimitry Andric }
578875ed548SDimitry Andric
579875ed548SDimitry Andric /// Return the first register of class \p RegClass that is not in \p Regs.
findFreeReg(const TargetRegisterClass & RegClass)580875ed548SDimitry Andric unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
581875ed548SDimitry Andric if (!RegClassInfoValid) {
582875ed548SDimitry Andric RegClassInfo.runOnMachineFunction(*MF);
583875ed548SDimitry Andric RegClassInfoValid = true;
584875ed548SDimitry Andric }
585875ed548SDimitry Andric
586875ed548SDimitry Andric for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
587875ed548SDimitry Andric if (!LiveRegs.contains(Reg))
588875ed548SDimitry Andric return Reg;
589875ed548SDimitry Andric return 0;
590875ed548SDimitry Andric }
591875ed548SDimitry Andric
592875ed548SDimitry Andric /// Compute live registers just before instruction \p Before (in normal schedule
593875ed548SDimitry Andric /// direction). Computes backwards so multiple queries in the same block must
594875ed548SDimitry Andric /// come in reverse order.
moveLiveRegsBefore(const MachineBasicBlock & MBB,MachineBasicBlock::const_iterator Before)595875ed548SDimitry Andric void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
596875ed548SDimitry Andric MachineBasicBlock::const_iterator Before) {
597875ed548SDimitry Andric // Initialize if we never queried in this block.
598875ed548SDimitry Andric if (!LiveRegsValid) {
599d88c1a5aSDimitry Andric LiveRegs.init(*TRI);
6003ca95b02SDimitry Andric LiveRegs.addLiveOuts(MBB);
601875ed548SDimitry Andric LiveRegPos = MBB.end();
602875ed548SDimitry Andric LiveRegsValid = true;
603875ed548SDimitry Andric }
604875ed548SDimitry Andric // Move backward just before the "Before" position.
605875ed548SDimitry Andric while (LiveRegPos != Before) {
606875ed548SDimitry Andric --LiveRegPos;
607875ed548SDimitry Andric LiveRegs.stepBackward(*LiveRegPos);
608875ed548SDimitry Andric }
609875ed548SDimitry Andric }
610875ed548SDimitry Andric
ContainsReg(const ArrayRef<std::pair<unsigned,bool>> & Regs,unsigned Reg)611875ed548SDimitry Andric static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
612875ed548SDimitry Andric unsigned Reg) {
613875ed548SDimitry Andric for (const std::pair<unsigned, bool> &R : Regs)
614875ed548SDimitry Andric if (R.first == Reg)
615875ed548SDimitry Andric return true;
616875ed548SDimitry Andric return false;
617875ed548SDimitry Andric }
618875ed548SDimitry Andric
619875ed548SDimitry Andric /// Create and insert a LDM or STM with Base as base register and registers in
620875ed548SDimitry Andric /// Regs as the register operands that would be loaded / stored. It returns
621875ed548SDimitry Andric /// true if the transformation is done.
CreateLoadStoreMulti(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,int Offset,unsigned Base,bool BaseKill,unsigned Opcode,ARMCC::CondCodes Pred,unsigned PredReg,const DebugLoc & DL,ArrayRef<std::pair<unsigned,bool>> Regs)6223ca95b02SDimitry Andric MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
6233ca95b02SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
6243ca95b02SDimitry Andric int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
6253ca95b02SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
6263ca95b02SDimitry Andric ArrayRef<std::pair<unsigned, bool>> Regs) {
627875ed548SDimitry Andric unsigned NumRegs = Regs.size();
628875ed548SDimitry Andric assert(NumRegs > 1);
629875ed548SDimitry Andric
630875ed548SDimitry Andric // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
631875ed548SDimitry Andric // Compute liveness information for that register to make the decision.
632875ed548SDimitry Andric bool SafeToClobberCPSR = !isThumb1 ||
633875ed548SDimitry Andric (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
634875ed548SDimitry Andric MachineBasicBlock::LQR_Dead);
635875ed548SDimitry Andric
636875ed548SDimitry Andric bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
637875ed548SDimitry Andric
638875ed548SDimitry Andric // Exception: If the base register is in the input reglist, Thumb1 LDM is
639875ed548SDimitry Andric // non-writeback.
640875ed548SDimitry Andric // It's also not possible to merge an STR of the base register in Thumb1.
6417a7e6055SDimitry Andric if (isThumb1 && ContainsReg(Regs, Base)) {
642875ed548SDimitry Andric assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
6437a7e6055SDimitry Andric if (Opcode == ARM::tLDRi)
644875ed548SDimitry Andric Writeback = false;
6457a7e6055SDimitry Andric else if (Opcode == ARM::tSTRi)
646875ed548SDimitry Andric return nullptr;
647875ed548SDimitry Andric }
648875ed548SDimitry Andric
649875ed548SDimitry Andric ARM_AM::AMSubMode Mode = ARM_AM::ia;
650875ed548SDimitry Andric // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
651875ed548SDimitry Andric bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
652875ed548SDimitry Andric bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
653875ed548SDimitry Andric
654875ed548SDimitry Andric if (Offset == 4 && haveIBAndDA) {
655875ed548SDimitry Andric Mode = ARM_AM::ib;
656875ed548SDimitry Andric } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
657875ed548SDimitry Andric Mode = ARM_AM::da;
658875ed548SDimitry Andric } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
659875ed548SDimitry Andric // VLDM/VSTM do not support DB mode without also updating the base reg.
660875ed548SDimitry Andric Mode = ARM_AM::db;
661875ed548SDimitry Andric } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
662875ed548SDimitry Andric // Check if this is a supported opcode before inserting instructions to
663875ed548SDimitry Andric // calculate a new base register.
664875ed548SDimitry Andric if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
665875ed548SDimitry Andric
666875ed548SDimitry Andric // If starting offset isn't zero, insert a MI to materialize a new base.
667875ed548SDimitry Andric // But only do so if it is cost effective, i.e. merging more than two
668875ed548SDimitry Andric // loads / stores.
669875ed548SDimitry Andric if (NumRegs <= 2)
670875ed548SDimitry Andric return nullptr;
671875ed548SDimitry Andric
672875ed548SDimitry Andric // On Thumb1, it's not worth materializing a new base register without
673875ed548SDimitry Andric // clobbering the CPSR (i.e. not using ADDS/SUBS).
674875ed548SDimitry Andric if (!SafeToClobberCPSR)
675875ed548SDimitry Andric return nullptr;
676875ed548SDimitry Andric
677875ed548SDimitry Andric unsigned NewBase;
678875ed548SDimitry Andric if (isi32Load(Opcode)) {
6797d523365SDimitry Andric // If it is a load, then just use one of the destination registers
6807d523365SDimitry Andric // as the new base. Will no longer be writeback in Thumb1.
681875ed548SDimitry Andric NewBase = Regs[NumRegs-1].first;
6827d523365SDimitry Andric Writeback = false;
683875ed548SDimitry Andric } else {
684875ed548SDimitry Andric // Find a free register that we can use as scratch register.
685875ed548SDimitry Andric moveLiveRegsBefore(MBB, InsertBefore);
686875ed548SDimitry Andric // The merged instruction does not exist yet but will use several Regs if
687875ed548SDimitry Andric // it is a Store.
688875ed548SDimitry Andric if (!isLoadSingle(Opcode))
689875ed548SDimitry Andric for (const std::pair<unsigned, bool> &R : Regs)
690875ed548SDimitry Andric LiveRegs.addReg(R.first);
691875ed548SDimitry Andric
692875ed548SDimitry Andric NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
693875ed548SDimitry Andric if (NewBase == 0)
694875ed548SDimitry Andric return nullptr;
695875ed548SDimitry Andric }
696875ed548SDimitry Andric
697875ed548SDimitry Andric int BaseOpc =
698875ed548SDimitry Andric isThumb2 ? ARM::t2ADDri :
699875ed548SDimitry Andric (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
700875ed548SDimitry Andric (isThumb1 && Offset < 8) ? ARM::tADDi3 :
701875ed548SDimitry Andric isThumb1 ? ARM::tADDi8 : ARM::ADDri;
702875ed548SDimitry Andric
703875ed548SDimitry Andric if (Offset < 0) {
704875ed548SDimitry Andric Offset = - Offset;
705875ed548SDimitry Andric BaseOpc =
706875ed548SDimitry Andric isThumb2 ? ARM::t2SUBri :
707875ed548SDimitry Andric (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
708875ed548SDimitry Andric isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
709875ed548SDimitry Andric }
710875ed548SDimitry Andric
711875ed548SDimitry Andric if (!TL->isLegalAddImmediate(Offset))
712875ed548SDimitry Andric // FIXME: Try add with register operand?
713875ed548SDimitry Andric return nullptr; // Probably not worth it then.
714875ed548SDimitry Andric
715875ed548SDimitry Andric // We can only append a kill flag to the add/sub input if the value is not
716875ed548SDimitry Andric // used in the register list of the stm as well.
717875ed548SDimitry Andric bool KillOldBase = BaseKill &&
718875ed548SDimitry Andric (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
719875ed548SDimitry Andric
720875ed548SDimitry Andric if (isThumb1) {
721875ed548SDimitry Andric // Thumb1: depending on immediate size, use either
722875ed548SDimitry Andric // ADDS NewBase, Base, #imm3
723875ed548SDimitry Andric // or
724875ed548SDimitry Andric // MOV NewBase, Base
725875ed548SDimitry Andric // ADDS NewBase, #imm8.
726875ed548SDimitry Andric if (Base != NewBase &&
727875ed548SDimitry Andric (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
728875ed548SDimitry Andric // Need to insert a MOV to the new base first.
729875ed548SDimitry Andric if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
730875ed548SDimitry Andric !STI->hasV6Ops()) {
731875ed548SDimitry Andric // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
732875ed548SDimitry Andric if (Pred != ARMCC::AL)
733875ed548SDimitry Andric return nullptr;
734875ed548SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
735875ed548SDimitry Andric .addReg(Base, getKillRegState(KillOldBase));
736875ed548SDimitry Andric } else
737875ed548SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
738875ed548SDimitry Andric .addReg(Base, getKillRegState(KillOldBase))
7397a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
740875ed548SDimitry Andric
741875ed548SDimitry Andric // The following ADDS/SUBS becomes an update.
742875ed548SDimitry Andric Base = NewBase;
743875ed548SDimitry Andric KillOldBase = true;
744875ed548SDimitry Andric }
745875ed548SDimitry Andric if (BaseOpc == ARM::tADDrSPi) {
746875ed548SDimitry Andric assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
747875ed548SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7487a7e6055SDimitry Andric .addReg(Base, getKillRegState(KillOldBase))
7497a7e6055SDimitry Andric .addImm(Offset / 4)
7507a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
751875ed548SDimitry Andric } else
7527a7e6055SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7537a7e6055SDimitry Andric .add(t1CondCodeOp(true))
7547a7e6055SDimitry Andric .addReg(Base, getKillRegState(KillOldBase))
7557a7e6055SDimitry Andric .addImm(Offset)
7567a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
757875ed548SDimitry Andric } else {
758875ed548SDimitry Andric BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
7597a7e6055SDimitry Andric .addReg(Base, getKillRegState(KillOldBase))
7607a7e6055SDimitry Andric .addImm(Offset)
7617a7e6055SDimitry Andric .add(predOps(Pred, PredReg))
7627a7e6055SDimitry Andric .add(condCodeOp());
763875ed548SDimitry Andric }
764875ed548SDimitry Andric Base = NewBase;
765875ed548SDimitry Andric BaseKill = true; // New base is always killed straight away.
766875ed548SDimitry Andric }
767875ed548SDimitry Andric
768875ed548SDimitry Andric bool isDef = isLoadSingle(Opcode);
769875ed548SDimitry Andric
770875ed548SDimitry Andric // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
771875ed548SDimitry Andric // base register writeback.
772875ed548SDimitry Andric Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
773875ed548SDimitry Andric if (!Opcode)
774875ed548SDimitry Andric return nullptr;
775875ed548SDimitry Andric
776875ed548SDimitry Andric // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
777875ed548SDimitry Andric // - There is no writeback (LDM of base register),
778875ed548SDimitry Andric // - the base register is killed by the merged instruction,
779875ed548SDimitry Andric // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
780875ed548SDimitry Andric // to reset the base register.
781875ed548SDimitry Andric // Otherwise, don't merge.
782875ed548SDimitry Andric // It's safe to return here since the code to materialize a new base register
783875ed548SDimitry Andric // above is also conditional on SafeToClobberCPSR.
784875ed548SDimitry Andric if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
785875ed548SDimitry Andric return nullptr;
786875ed548SDimitry Andric
787875ed548SDimitry Andric MachineInstrBuilder MIB;
788875ed548SDimitry Andric
789875ed548SDimitry Andric if (Writeback) {
7907d523365SDimitry Andric assert(isThumb1 && "expected Writeback only inThumb1");
7917d523365SDimitry Andric if (Opcode == ARM::tLDMIA) {
7927d523365SDimitry Andric assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
793875ed548SDimitry Andric // Update tLDMIA with writeback if necessary.
794875ed548SDimitry Andric Opcode = ARM::tLDMIA_UPD;
7957d523365SDimitry Andric }
796875ed548SDimitry Andric
797875ed548SDimitry Andric MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
798875ed548SDimitry Andric
799875ed548SDimitry Andric // Thumb1: we might need to set base writeback when building the MI.
800875ed548SDimitry Andric MIB.addReg(Base, getDefRegState(true))
801875ed548SDimitry Andric .addReg(Base, getKillRegState(BaseKill));
802875ed548SDimitry Andric
803875ed548SDimitry Andric // The base isn't dead after a merged instruction with writeback.
804875ed548SDimitry Andric // Insert a sub instruction after the newly formed instruction to reset.
805875ed548SDimitry Andric if (!BaseKill)
806875ed548SDimitry Andric UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
807875ed548SDimitry Andric } else {
808875ed548SDimitry Andric // No writeback, simply build the MachineInstr.
809875ed548SDimitry Andric MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
810875ed548SDimitry Andric MIB.addReg(Base, getKillRegState(BaseKill));
811875ed548SDimitry Andric }
812875ed548SDimitry Andric
813875ed548SDimitry Andric MIB.addImm(Pred).addReg(PredReg);
814875ed548SDimitry Andric
815875ed548SDimitry Andric for (const std::pair<unsigned, bool> &R : Regs)
816875ed548SDimitry Andric MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
817875ed548SDimitry Andric
818875ed548SDimitry Andric return MIB.getInstr();
819875ed548SDimitry Andric }
820875ed548SDimitry Andric
CreateLoadStoreDouble(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,int Offset,unsigned Base,bool BaseKill,unsigned Opcode,ARMCC::CondCodes Pred,unsigned PredReg,const DebugLoc & DL,ArrayRef<std::pair<unsigned,bool>> Regs) const8213ca95b02SDimitry Andric MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
8223ca95b02SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
8233ca95b02SDimitry Andric int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
8243ca95b02SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
8253ca95b02SDimitry Andric ArrayRef<std::pair<unsigned, bool>> Regs) const {
826875ed548SDimitry Andric bool IsLoad = isi32Load(Opcode);
827875ed548SDimitry Andric assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
828875ed548SDimitry Andric unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
829875ed548SDimitry Andric
830875ed548SDimitry Andric assert(Regs.size() == 2);
831875ed548SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
832875ed548SDimitry Andric TII->get(LoadStoreOpcode));
833875ed548SDimitry Andric if (IsLoad) {
834875ed548SDimitry Andric MIB.addReg(Regs[0].first, RegState::Define)
835875ed548SDimitry Andric .addReg(Regs[1].first, RegState::Define);
836875ed548SDimitry Andric } else {
837875ed548SDimitry Andric MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
838875ed548SDimitry Andric .addReg(Regs[1].first, getKillRegState(Regs[1].second));
839875ed548SDimitry Andric }
840875ed548SDimitry Andric MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
841875ed548SDimitry Andric return MIB.getInstr();
842875ed548SDimitry Andric }
843875ed548SDimitry Andric
844875ed548SDimitry Andric /// Call MergeOps and update MemOps and merges accordingly on success.
MergeOpsUpdate(const MergeCandidate & Cand)845875ed548SDimitry Andric MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
846875ed548SDimitry Andric const MachineInstr *First = Cand.Instrs.front();
847875ed548SDimitry Andric unsigned Opcode = First->getOpcode();
848875ed548SDimitry Andric bool IsLoad = isLoadSingle(Opcode);
849875ed548SDimitry Andric SmallVector<std::pair<unsigned, bool>, 8> Regs;
850875ed548SDimitry Andric SmallVector<unsigned, 4> ImpDefs;
851875ed548SDimitry Andric DenseSet<unsigned> KilledRegs;
8527d523365SDimitry Andric DenseSet<unsigned> UsedRegs;
853875ed548SDimitry Andric // Determine list of registers and list of implicit super-register defs.
854875ed548SDimitry Andric for (const MachineInstr *MI : Cand.Instrs) {
855875ed548SDimitry Andric const MachineOperand &MO = getLoadStoreRegOp(*MI);
856875ed548SDimitry Andric unsigned Reg = MO.getReg();
857875ed548SDimitry Andric bool IsKill = MO.isKill();
858875ed548SDimitry Andric if (IsKill)
859875ed548SDimitry Andric KilledRegs.insert(Reg);
860875ed548SDimitry Andric Regs.push_back(std::make_pair(Reg, IsKill));
8617d523365SDimitry Andric UsedRegs.insert(Reg);
862875ed548SDimitry Andric
863875ed548SDimitry Andric if (IsLoad) {
864875ed548SDimitry Andric // Collect any implicit defs of super-registers, after merging we can't
865875ed548SDimitry Andric // be sure anymore that we properly preserved these live ranges and must
866875ed548SDimitry Andric // removed these implicit operands.
867875ed548SDimitry Andric for (const MachineOperand &MO : MI->implicit_operands()) {
868875ed548SDimitry Andric if (!MO.isReg() || !MO.isDef() || MO.isDead())
869875ed548SDimitry Andric continue;
870875ed548SDimitry Andric assert(MO.isImplicit());
871875ed548SDimitry Andric unsigned DefReg = MO.getReg();
872875ed548SDimitry Andric
873d88c1a5aSDimitry Andric if (is_contained(ImpDefs, DefReg))
874875ed548SDimitry Andric continue;
875875ed548SDimitry Andric // We can ignore cases where the super-reg is read and written.
876875ed548SDimitry Andric if (MI->readsRegister(DefReg))
877875ed548SDimitry Andric continue;
878875ed548SDimitry Andric ImpDefs.push_back(DefReg);
879875ed548SDimitry Andric }
880875ed548SDimitry Andric }
881875ed548SDimitry Andric }
882875ed548SDimitry Andric
883875ed548SDimitry Andric // Attempt the merge.
8842cab237bSDimitry Andric using iterator = MachineBasicBlock::iterator;
8852cab237bSDimitry Andric
886875ed548SDimitry Andric MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
887875ed548SDimitry Andric iterator InsertBefore = std::next(iterator(LatestMI));
888875ed548SDimitry Andric MachineBasicBlock &MBB = *LatestMI->getParent();
8893ca95b02SDimitry Andric unsigned Offset = getMemoryOpOffset(*First);
890875ed548SDimitry Andric unsigned Base = getLoadStoreBaseOp(*First).getReg();
891875ed548SDimitry Andric bool BaseKill = LatestMI->killsRegister(Base);
892875ed548SDimitry Andric unsigned PredReg = 0;
8933ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
894875ed548SDimitry Andric DebugLoc DL = First->getDebugLoc();
895875ed548SDimitry Andric MachineInstr *Merged = nullptr;
896875ed548SDimitry Andric if (Cand.CanMergeToLSDouble)
897875ed548SDimitry Andric Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
898875ed548SDimitry Andric Opcode, Pred, PredReg, DL, Regs);
899875ed548SDimitry Andric if (!Merged && Cand.CanMergeToLSMulti)
900875ed548SDimitry Andric Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
901875ed548SDimitry Andric Opcode, Pred, PredReg, DL, Regs);
902875ed548SDimitry Andric if (!Merged)
903875ed548SDimitry Andric return nullptr;
904875ed548SDimitry Andric
905875ed548SDimitry Andric // Determine earliest instruction that will get removed. We then keep an
906875ed548SDimitry Andric // iterator just above it so the following erases don't invalidated it.
907875ed548SDimitry Andric iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
908875ed548SDimitry Andric bool EarliestAtBegin = false;
909875ed548SDimitry Andric if (EarliestI == MBB.begin()) {
910875ed548SDimitry Andric EarliestAtBegin = true;
911875ed548SDimitry Andric } else {
912875ed548SDimitry Andric EarliestI = std::prev(EarliestI);
913875ed548SDimitry Andric }
914875ed548SDimitry Andric
915875ed548SDimitry Andric // Remove instructions which have been merged.
916875ed548SDimitry Andric for (MachineInstr *MI : Cand.Instrs)
917875ed548SDimitry Andric MBB.erase(MI);
918875ed548SDimitry Andric
919875ed548SDimitry Andric // Determine range between the earliest removed instruction and the new one.
920875ed548SDimitry Andric if (EarliestAtBegin)
921875ed548SDimitry Andric EarliestI = MBB.begin();
922875ed548SDimitry Andric else
923875ed548SDimitry Andric EarliestI = std::next(EarliestI);
924875ed548SDimitry Andric auto FixupRange = make_range(EarliestI, iterator(Merged));
925875ed548SDimitry Andric
926875ed548SDimitry Andric if (isLoadSingle(Opcode)) {
927875ed548SDimitry Andric // If the previous loads defined a super-reg, then we have to mark earlier
928875ed548SDimitry Andric // operands undef; Replicate the super-reg def on the merged instruction.
929875ed548SDimitry Andric for (MachineInstr &MI : FixupRange) {
930875ed548SDimitry Andric for (unsigned &ImpDefReg : ImpDefs) {
931875ed548SDimitry Andric for (MachineOperand &MO : MI.implicit_operands()) {
932875ed548SDimitry Andric if (!MO.isReg() || MO.getReg() != ImpDefReg)
933875ed548SDimitry Andric continue;
934875ed548SDimitry Andric if (MO.readsReg())
935875ed548SDimitry Andric MO.setIsUndef();
936875ed548SDimitry Andric else if (MO.isDef())
937875ed548SDimitry Andric ImpDefReg = 0;
938875ed548SDimitry Andric }
939875ed548SDimitry Andric }
940875ed548SDimitry Andric }
941875ed548SDimitry Andric
942875ed548SDimitry Andric MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
943875ed548SDimitry Andric for (unsigned ImpDef : ImpDefs)
944875ed548SDimitry Andric MIB.addReg(ImpDef, RegState::ImplicitDefine);
945875ed548SDimitry Andric } else {
946875ed548SDimitry Andric // Remove kill flags: We are possibly storing the values later now.
947875ed548SDimitry Andric assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
948875ed548SDimitry Andric for (MachineInstr &MI : FixupRange) {
949875ed548SDimitry Andric for (MachineOperand &MO : MI.uses()) {
950875ed548SDimitry Andric if (!MO.isReg() || !MO.isKill())
951875ed548SDimitry Andric continue;
9527d523365SDimitry Andric if (UsedRegs.count(MO.getReg()))
953875ed548SDimitry Andric MO.setIsKill(false);
954875ed548SDimitry Andric }
955875ed548SDimitry Andric }
956875ed548SDimitry Andric assert(ImpDefs.empty());
957875ed548SDimitry Andric }
958875ed548SDimitry Andric
959875ed548SDimitry Andric return Merged;
960875ed548SDimitry Andric }
961875ed548SDimitry Andric
isValidLSDoubleOffset(int Offset)962875ed548SDimitry Andric static bool isValidLSDoubleOffset(int Offset) {
963875ed548SDimitry Andric unsigned Value = abs(Offset);
964875ed548SDimitry Andric // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
965875ed548SDimitry Andric // multiplied by 4.
966875ed548SDimitry Andric return (Value % 4) == 0 && Value < 1024;
967875ed548SDimitry Andric }
968875ed548SDimitry Andric
9693ca95b02SDimitry Andric /// Return true for loads/stores that can be combined to a double/multi
9703ca95b02SDimitry Andric /// operation without increasing the requirements for alignment.
mayCombineMisaligned(const TargetSubtargetInfo & STI,const MachineInstr & MI)9713ca95b02SDimitry Andric static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
9723ca95b02SDimitry Andric const MachineInstr &MI) {
9733ca95b02SDimitry Andric // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
9743ca95b02SDimitry Andric // difference.
9753ca95b02SDimitry Andric unsigned Opcode = MI.getOpcode();
9763ca95b02SDimitry Andric if (!isi32Load(Opcode) && !isi32Store(Opcode))
9773ca95b02SDimitry Andric return true;
9783ca95b02SDimitry Andric
9793ca95b02SDimitry Andric // Stack pointer alignment is out of the programmers control so we can trust
9803ca95b02SDimitry Andric // SP-relative loads/stores.
9813ca95b02SDimitry Andric if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
9823ca95b02SDimitry Andric STI.getFrameLowering()->getTransientStackAlignment() >= 4)
9833ca95b02SDimitry Andric return true;
9843ca95b02SDimitry Andric return false;
9853ca95b02SDimitry Andric }
9863ca95b02SDimitry Andric
987875ed548SDimitry Andric /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
FormCandidates(const MemOpQueue & MemOps)988875ed548SDimitry Andric void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
989875ed548SDimitry Andric const MachineInstr *FirstMI = MemOps[0].MI;
990875ed548SDimitry Andric unsigned Opcode = FirstMI->getOpcode();
991875ed548SDimitry Andric bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
992875ed548SDimitry Andric unsigned Size = getLSMultipleTransferSize(FirstMI);
993875ed548SDimitry Andric
994875ed548SDimitry Andric unsigned SIndex = 0;
995875ed548SDimitry Andric unsigned EIndex = MemOps.size();
996875ed548SDimitry Andric do {
997875ed548SDimitry Andric // Look at the first instruction.
998875ed548SDimitry Andric const MachineInstr *MI = MemOps[SIndex].MI;
999875ed548SDimitry Andric int Offset = MemOps[SIndex].Offset;
1000875ed548SDimitry Andric const MachineOperand &PMO = getLoadStoreRegOp(*MI);
1001875ed548SDimitry Andric unsigned PReg = PMO.getReg();
10022cab237bSDimitry Andric unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
10032cab237bSDimitry Andric : TRI->getEncodingValue(PReg);
1004875ed548SDimitry Andric unsigned Latest = SIndex;
1005875ed548SDimitry Andric unsigned Earliest = SIndex;
1006875ed548SDimitry Andric unsigned Count = 1;
1007875ed548SDimitry Andric bool CanMergeToLSDouble =
1008875ed548SDimitry Andric STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
1009875ed548SDimitry Andric // ARM errata 602117: LDRD with base in list may result in incorrect base
1010875ed548SDimitry Andric // register when interrupted or faulted.
1011875ed548SDimitry Andric if (STI->isCortexM3() && isi32Load(Opcode) &&
1012875ed548SDimitry Andric PReg == getLoadStoreBaseOp(*MI).getReg())
1013875ed548SDimitry Andric CanMergeToLSDouble = false;
1014875ed548SDimitry Andric
1015875ed548SDimitry Andric bool CanMergeToLSMulti = true;
1016875ed548SDimitry Andric // On swift vldm/vstm starting with an odd register number as that needs
1017875ed548SDimitry Andric // more uops than single vldrs.
10183ca95b02SDimitry Andric if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
1019875ed548SDimitry Andric CanMergeToLSMulti = false;
1020875ed548SDimitry Andric
1021b6c25e0eSDimitry Andric // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
1022b6c25e0eSDimitry Andric // deprecated; LDM to PC is fine but cannot happen here.
1023b6c25e0eSDimitry Andric if (PReg == ARM::SP || PReg == ARM::PC)
1024b6c25e0eSDimitry Andric CanMergeToLSMulti = CanMergeToLSDouble = false;
1025b6c25e0eSDimitry Andric
10263ca95b02SDimitry Andric // Should we be conservative?
10273ca95b02SDimitry Andric if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
10283ca95b02SDimitry Andric CanMergeToLSMulti = CanMergeToLSDouble = false;
10293ca95b02SDimitry Andric
1030*b5893f02SDimitry Andric // vldm / vstm limit are 32 for S variants, 16 for D variants.
1031*b5893f02SDimitry Andric unsigned Limit;
1032*b5893f02SDimitry Andric switch (Opcode) {
1033*b5893f02SDimitry Andric default:
1034*b5893f02SDimitry Andric Limit = UINT_MAX;
1035*b5893f02SDimitry Andric break;
1036*b5893f02SDimitry Andric case ARM::VLDRD:
1037*b5893f02SDimitry Andric case ARM::VSTRD:
1038*b5893f02SDimitry Andric Limit = 16;
1039*b5893f02SDimitry Andric break;
1040*b5893f02SDimitry Andric }
1041*b5893f02SDimitry Andric
1042875ed548SDimitry Andric // Merge following instructions where possible.
1043875ed548SDimitry Andric for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
1044875ed548SDimitry Andric int NewOffset = MemOps[I].Offset;
1045875ed548SDimitry Andric if (NewOffset != Offset + (int)Size)
1046875ed548SDimitry Andric break;
1047875ed548SDimitry Andric const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
1048875ed548SDimitry Andric unsigned Reg = MO.getReg();
1049b6c25e0eSDimitry Andric if (Reg == ARM::SP || Reg == ARM::PC)
1050b6c25e0eSDimitry Andric break;
1051*b5893f02SDimitry Andric if (Count == Limit)
1052*b5893f02SDimitry Andric break;
1053875ed548SDimitry Andric
1054875ed548SDimitry Andric // See if the current load/store may be part of a multi load/store.
10552cab237bSDimitry Andric unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
10562cab237bSDimitry Andric : TRI->getEncodingValue(Reg);
1057875ed548SDimitry Andric bool PartOfLSMulti = CanMergeToLSMulti;
1058875ed548SDimitry Andric if (PartOfLSMulti) {
1059875ed548SDimitry Andric // Register numbers must be in ascending order.
1060b6c25e0eSDimitry Andric if (RegNum <= PRegNum)
1061875ed548SDimitry Andric PartOfLSMulti = false;
1062875ed548SDimitry Andric // For VFP / NEON load/store multiples, the registers must be
1063875ed548SDimitry Andric // consecutive and within the limit on the number of registers per
1064875ed548SDimitry Andric // instruction.
1065875ed548SDimitry Andric else if (!isNotVFP && RegNum != PRegNum+1)
1066875ed548SDimitry Andric PartOfLSMulti = false;
1067875ed548SDimitry Andric }
1068875ed548SDimitry Andric // See if the current load/store may be part of a double load/store.
1069875ed548SDimitry Andric bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1070875ed548SDimitry Andric
1071875ed548SDimitry Andric if (!PartOfLSMulti && !PartOfLSDouble)
1072875ed548SDimitry Andric break;
1073875ed548SDimitry Andric CanMergeToLSMulti &= PartOfLSMulti;
1074875ed548SDimitry Andric CanMergeToLSDouble &= PartOfLSDouble;
1075875ed548SDimitry Andric // Track MemOp with latest and earliest position (Positions are
1076875ed548SDimitry Andric // counted in reverse).
1077875ed548SDimitry Andric unsigned Position = MemOps[I].Position;
1078875ed548SDimitry Andric if (Position < MemOps[Latest].Position)
1079875ed548SDimitry Andric Latest = I;
1080875ed548SDimitry Andric else if (Position > MemOps[Earliest].Position)
1081875ed548SDimitry Andric Earliest = I;
1082875ed548SDimitry Andric // Prepare for next MemOp.
1083875ed548SDimitry Andric Offset += Size;
1084875ed548SDimitry Andric PRegNum = RegNum;
1085875ed548SDimitry Andric }
1086875ed548SDimitry Andric
1087875ed548SDimitry Andric // Form a candidate from the Ops collected so far.
1088875ed548SDimitry Andric MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1089875ed548SDimitry Andric for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1090875ed548SDimitry Andric Candidate->Instrs.push_back(MemOps[C].MI);
1091875ed548SDimitry Andric Candidate->LatestMIIdx = Latest - SIndex;
1092875ed548SDimitry Andric Candidate->EarliestMIIdx = Earliest - SIndex;
1093875ed548SDimitry Andric Candidate->InsertPos = MemOps[Latest].Position;
1094875ed548SDimitry Andric if (Count == 1)
1095875ed548SDimitry Andric CanMergeToLSMulti = CanMergeToLSDouble = false;
1096875ed548SDimitry Andric Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1097875ed548SDimitry Andric Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1098875ed548SDimitry Andric Candidates.push_back(Candidate);
1099875ed548SDimitry Andric // Continue after the chain.
1100875ed548SDimitry Andric SIndex += Count;
1101875ed548SDimitry Andric } while (SIndex < EIndex);
1102875ed548SDimitry Andric }
1103875ed548SDimitry Andric
getUpdatingLSMultipleOpcode(unsigned Opc,ARM_AM::AMSubMode Mode)11042754fe60SDimitry Andric static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
11052754fe60SDimitry Andric ARM_AM::AMSubMode Mode) {
1106f22ef01cSRoman Divacky switch (Opc) {
1107f22ef01cSRoman Divacky default: llvm_unreachable("Unhandled opcode!");
11082754fe60SDimitry Andric case ARM::LDMIA:
11092754fe60SDimitry Andric case ARM::LDMDA:
11102754fe60SDimitry Andric case ARM::LDMDB:
11112754fe60SDimitry Andric case ARM::LDMIB:
11122754fe60SDimitry Andric switch (Mode) {
11132754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11142754fe60SDimitry Andric case ARM_AM::ia: return ARM::LDMIA_UPD;
11152754fe60SDimitry Andric case ARM_AM::ib: return ARM::LDMIB_UPD;
11162754fe60SDimitry Andric case ARM_AM::da: return ARM::LDMDA_UPD;
11172754fe60SDimitry Andric case ARM_AM::db: return ARM::LDMDB_UPD;
1118f22ef01cSRoman Divacky }
11192754fe60SDimitry Andric case ARM::STMIA:
11202754fe60SDimitry Andric case ARM::STMDA:
11212754fe60SDimitry Andric case ARM::STMDB:
11222754fe60SDimitry Andric case ARM::STMIB:
11232754fe60SDimitry Andric switch (Mode) {
11242754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11252754fe60SDimitry Andric case ARM_AM::ia: return ARM::STMIA_UPD;
11262754fe60SDimitry Andric case ARM_AM::ib: return ARM::STMIB_UPD;
11272754fe60SDimitry Andric case ARM_AM::da: return ARM::STMDA_UPD;
11282754fe60SDimitry Andric case ARM_AM::db: return ARM::STMDB_UPD;
11292754fe60SDimitry Andric }
11302754fe60SDimitry Andric case ARM::t2LDMIA:
11312754fe60SDimitry Andric case ARM::t2LDMDB:
11322754fe60SDimitry Andric switch (Mode) {
11332754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11342754fe60SDimitry Andric case ARM_AM::ia: return ARM::t2LDMIA_UPD;
11352754fe60SDimitry Andric case ARM_AM::db: return ARM::t2LDMDB_UPD;
11362754fe60SDimitry Andric }
11372754fe60SDimitry Andric case ARM::t2STMIA:
11382754fe60SDimitry Andric case ARM::t2STMDB:
11392754fe60SDimitry Andric switch (Mode) {
11402754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11412754fe60SDimitry Andric case ARM_AM::ia: return ARM::t2STMIA_UPD;
11422754fe60SDimitry Andric case ARM_AM::db: return ARM::t2STMDB_UPD;
11432754fe60SDimitry Andric }
11442754fe60SDimitry Andric case ARM::VLDMSIA:
11452754fe60SDimitry Andric switch (Mode) {
11462754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11472754fe60SDimitry Andric case ARM_AM::ia: return ARM::VLDMSIA_UPD;
11482754fe60SDimitry Andric case ARM_AM::db: return ARM::VLDMSDB_UPD;
11492754fe60SDimitry Andric }
11502754fe60SDimitry Andric case ARM::VLDMDIA:
11512754fe60SDimitry Andric switch (Mode) {
11522754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11532754fe60SDimitry Andric case ARM_AM::ia: return ARM::VLDMDIA_UPD;
11542754fe60SDimitry Andric case ARM_AM::db: return ARM::VLDMDDB_UPD;
11552754fe60SDimitry Andric }
11562754fe60SDimitry Andric case ARM::VSTMSIA:
11572754fe60SDimitry Andric switch (Mode) {
11582754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11592754fe60SDimitry Andric case ARM_AM::ia: return ARM::VSTMSIA_UPD;
11602754fe60SDimitry Andric case ARM_AM::db: return ARM::VSTMSDB_UPD;
11612754fe60SDimitry Andric }
11622754fe60SDimitry Andric case ARM::VSTMDIA:
11632754fe60SDimitry Andric switch (Mode) {
11642754fe60SDimitry Andric default: llvm_unreachable("Unhandled submode!");
11652754fe60SDimitry Andric case ARM_AM::ia: return ARM::VSTMDIA_UPD;
11662754fe60SDimitry Andric case ARM_AM::db: return ARM::VSTMDDB_UPD;
11672754fe60SDimitry Andric }
11682754fe60SDimitry Andric }
1169f22ef01cSRoman Divacky }
1170f22ef01cSRoman Divacky
11717d523365SDimitry Andric /// Check if the given instruction increments or decrements a register and
11727d523365SDimitry Andric /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
11737d523365SDimitry Andric /// generated by the instruction are possibly read as well.
isIncrementOrDecrement(const MachineInstr & MI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg)11747d523365SDimitry Andric static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
11757d523365SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg) {
11767d523365SDimitry Andric bool CheckCPSRDef;
11777d523365SDimitry Andric int Scale;
11787d523365SDimitry Andric switch (MI.getOpcode()) {
11797d523365SDimitry Andric case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
11807d523365SDimitry Andric case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
11817d523365SDimitry Andric case ARM::t2SUBri:
11827d523365SDimitry Andric case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
11837d523365SDimitry Andric case ARM::t2ADDri:
11847d523365SDimitry Andric case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
11857d523365SDimitry Andric case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
11867d523365SDimitry Andric case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
11877d523365SDimitry Andric default: return 0;
11887d523365SDimitry Andric }
11897d523365SDimitry Andric
11907d523365SDimitry Andric unsigned MIPredReg;
11917d523365SDimitry Andric if (MI.getOperand(0).getReg() != Reg ||
11927d523365SDimitry Andric MI.getOperand(1).getReg() != Reg ||
11933ca95b02SDimitry Andric getInstrPredicate(MI, MIPredReg) != Pred ||
11947d523365SDimitry Andric MIPredReg != PredReg)
11957d523365SDimitry Andric return 0;
11967d523365SDimitry Andric
11973ca95b02SDimitry Andric if (CheckCPSRDef && definesCPSR(MI))
11987d523365SDimitry Andric return 0;
11997d523365SDimitry Andric return MI.getOperand(2).getImm() * Scale;
12007d523365SDimitry Andric }
12017d523365SDimitry Andric
12027d523365SDimitry Andric /// Searches for an increment or decrement of \p Reg before \p MBBI.
12037d523365SDimitry Andric static MachineBasicBlock::iterator
findIncDecBefore(MachineBasicBlock::iterator MBBI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg,int & Offset)12047d523365SDimitry Andric findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
12057d523365SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
12067d523365SDimitry Andric Offset = 0;
12077d523365SDimitry Andric MachineBasicBlock &MBB = *MBBI->getParent();
12087d523365SDimitry Andric MachineBasicBlock::iterator BeginMBBI = MBB.begin();
12097d523365SDimitry Andric MachineBasicBlock::iterator EndMBBI = MBB.end();
12107d523365SDimitry Andric if (MBBI == BeginMBBI)
12117d523365SDimitry Andric return EndMBBI;
12127d523365SDimitry Andric
12137d523365SDimitry Andric // Skip debug values.
12147d523365SDimitry Andric MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
12154ba319b5SDimitry Andric while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
12167d523365SDimitry Andric --PrevMBBI;
12177d523365SDimitry Andric
12187d523365SDimitry Andric Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
12197d523365SDimitry Andric return Offset == 0 ? EndMBBI : PrevMBBI;
12207d523365SDimitry Andric }
12217d523365SDimitry Andric
12227d523365SDimitry Andric /// Searches for a increment or decrement of \p Reg after \p MBBI.
12237d523365SDimitry Andric static MachineBasicBlock::iterator
findIncDecAfter(MachineBasicBlock::iterator MBBI,unsigned Reg,ARMCC::CondCodes Pred,unsigned PredReg,int & Offset)12247d523365SDimitry Andric findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
12257d523365SDimitry Andric ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
12267d523365SDimitry Andric Offset = 0;
12277d523365SDimitry Andric MachineBasicBlock &MBB = *MBBI->getParent();
12287d523365SDimitry Andric MachineBasicBlock::iterator EndMBBI = MBB.end();
12297d523365SDimitry Andric MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
12307d523365SDimitry Andric // Skip debug values.
12314ba319b5SDimitry Andric while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
12327d523365SDimitry Andric ++NextMBBI;
12337d523365SDimitry Andric if (NextMBBI == EndMBBI)
12347d523365SDimitry Andric return EndMBBI;
12357d523365SDimitry Andric
12367d523365SDimitry Andric Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
12377d523365SDimitry Andric return Offset == 0 ? EndMBBI : NextMBBI;
12387d523365SDimitry Andric }
12397d523365SDimitry Andric
124097bc6c73SDimitry Andric /// Fold proceeding/trailing inc/dec of base register into the
124197bc6c73SDimitry Andric /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1242f22ef01cSRoman Divacky ///
1243f22ef01cSRoman Divacky /// stmia rn, <ra, rb, rc>
1244f22ef01cSRoman Divacky /// rn := rn + 4 * 3;
1245f22ef01cSRoman Divacky /// =>
1246f22ef01cSRoman Divacky /// stmia rn!, <ra, rb, rc>
1247f22ef01cSRoman Divacky ///
1248f22ef01cSRoman Divacky /// rn := rn - 4 * 3;
1249f22ef01cSRoman Divacky /// ldmia rn, <ra, rb, rc>
1250f22ef01cSRoman Divacky /// =>
1251f22ef01cSRoman Divacky /// ldmdb rn!, <ra, rb, rc>
MergeBaseUpdateLSMultiple(MachineInstr * MI)1252875ed548SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
125391bc56edSDimitry Andric // Thumb1 is already using updating loads/stores.
125491bc56edSDimitry Andric if (isThumb1) return false;
125591bc56edSDimitry Andric
1256875ed548SDimitry Andric const MachineOperand &BaseOP = MI->getOperand(0);
1257875ed548SDimitry Andric unsigned Base = BaseOP.getReg();
1258875ed548SDimitry Andric bool BaseKill = BaseOP.isKill();
1259f22ef01cSRoman Divacky unsigned PredReg = 0;
12603ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1261ff0cc061SDimitry Andric unsigned Opcode = MI->getOpcode();
1262875ed548SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1263f22ef01cSRoman Divacky
1264f22ef01cSRoman Divacky // Can't use an updating ld/st if the base register is also a dest
1265f22ef01cSRoman Divacky // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
12662754fe60SDimitry Andric for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1267f22ef01cSRoman Divacky if (MI->getOperand(i).getReg() == Base)
1268f22ef01cSRoman Divacky return false;
12692754fe60SDimitry Andric
12707d523365SDimitry Andric int Bytes = getLSMultipleTransferSize(MI);
1271b6c25e0eSDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1272b6c25e0eSDimitry Andric MachineBasicBlock::iterator MBBI(MI);
12737d523365SDimitry Andric int Offset;
12747d523365SDimitry Andric MachineBasicBlock::iterator MergeInstr
12757d523365SDimitry Andric = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
12767d523365SDimitry Andric ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
12777d523365SDimitry Andric if (Mode == ARM_AM::ia && Offset == -Bytes) {
1278f22ef01cSRoman Divacky Mode = ARM_AM::db;
12797d523365SDimitry Andric } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1280f22ef01cSRoman Divacky Mode = ARM_AM::da;
12817d523365SDimitry Andric } else {
12827d523365SDimitry Andric MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
12837d523365SDimitry Andric if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
12843ca95b02SDimitry Andric ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
12853ca95b02SDimitry Andric
12863ca95b02SDimitry Andric // We couldn't find an inc/dec to merge. But if the base is dead, we
12873ca95b02SDimitry Andric // can still change to a writeback form as that will save us 2 bytes
12883ca95b02SDimitry Andric // of code size. It can create WAW hazards though, so only do it if
12893ca95b02SDimitry Andric // we're minimizing code size.
12902cab237bSDimitry Andric if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
12913ca95b02SDimitry Andric return false;
12923ca95b02SDimitry Andric
12933ca95b02SDimitry Andric bool HighRegsUsed = false;
12943ca95b02SDimitry Andric for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
12953ca95b02SDimitry Andric if (MI->getOperand(i).getReg() >= ARM::R8) {
12963ca95b02SDimitry Andric HighRegsUsed = true;
12973ca95b02SDimitry Andric break;
12983ca95b02SDimitry Andric }
12993ca95b02SDimitry Andric
13003ca95b02SDimitry Andric if (!HighRegsUsed)
13013ca95b02SDimitry Andric MergeInstr = MBB.end();
13023ca95b02SDimitry Andric else
1303b6c25e0eSDimitry Andric return false;
13047d523365SDimitry Andric }
13053ca95b02SDimitry Andric }
13063ca95b02SDimitry Andric if (MergeInstr != MBB.end())
13077d523365SDimitry Andric MBB.erase(MergeInstr);
1308f22ef01cSRoman Divacky
13092754fe60SDimitry Andric unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1310875ed548SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1311f22ef01cSRoman Divacky .addReg(Base, getDefRegState(true)) // WB base register
1312e580952dSDimitry Andric .addReg(Base, getKillRegState(BaseKill))
1313f22ef01cSRoman Divacky .addImm(Pred).addReg(PredReg);
13142754fe60SDimitry Andric
1315f22ef01cSRoman Divacky // Transfer the rest of operands.
13162754fe60SDimitry Andric for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
13177a7e6055SDimitry Andric MIB.add(MI->getOperand(OpNum));
13182754fe60SDimitry Andric
1319f22ef01cSRoman Divacky // Transfer memoperands.
1320*b5893f02SDimitry Andric MIB.setMemRefs(MI->memoperands());
1321f22ef01cSRoman Divacky
1322f22ef01cSRoman Divacky MBB.erase(MBBI);
1323f22ef01cSRoman Divacky return true;
1324f22ef01cSRoman Divacky }
1325f22ef01cSRoman Divacky
getPreIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)13262754fe60SDimitry Andric static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
13272754fe60SDimitry Andric ARM_AM::AddrOpc Mode) {
1328f22ef01cSRoman Divacky switch (Opc) {
13292754fe60SDimitry Andric case ARM::LDRi12:
13306122f3e6SDimitry Andric return ARM::LDR_PRE_IMM;
13312754fe60SDimitry Andric case ARM::STRi12:
13326122f3e6SDimitry Andric return ARM::STR_PRE_IMM;
13332754fe60SDimitry Andric case ARM::VLDRS:
13342754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
13352754fe60SDimitry Andric case ARM::VLDRD:
13362754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
13372754fe60SDimitry Andric case ARM::VSTRS:
13382754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
13392754fe60SDimitry Andric case ARM::VSTRD:
13402754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1341f22ef01cSRoman Divacky case ARM::t2LDRi8:
1342f22ef01cSRoman Divacky case ARM::t2LDRi12:
1343f22ef01cSRoman Divacky return ARM::t2LDR_PRE;
1344f22ef01cSRoman Divacky case ARM::t2STRi8:
1345f22ef01cSRoman Divacky case ARM::t2STRi12:
1346f22ef01cSRoman Divacky return ARM::t2STR_PRE;
1347f22ef01cSRoman Divacky default: llvm_unreachable("Unhandled opcode!");
1348f22ef01cSRoman Divacky }
1349f22ef01cSRoman Divacky }
1350f22ef01cSRoman Divacky
getPostIndexedLoadStoreOpcode(unsigned Opc,ARM_AM::AddrOpc Mode)13512754fe60SDimitry Andric static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
13522754fe60SDimitry Andric ARM_AM::AddrOpc Mode) {
1353f22ef01cSRoman Divacky switch (Opc) {
13542754fe60SDimitry Andric case ARM::LDRi12:
13556122f3e6SDimitry Andric return ARM::LDR_POST_IMM;
13562754fe60SDimitry Andric case ARM::STRi12:
13576122f3e6SDimitry Andric return ARM::STR_POST_IMM;
13582754fe60SDimitry Andric case ARM::VLDRS:
13592754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
13602754fe60SDimitry Andric case ARM::VLDRD:
13612754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
13622754fe60SDimitry Andric case ARM::VSTRS:
13632754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
13642754fe60SDimitry Andric case ARM::VSTRD:
13652754fe60SDimitry Andric return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1366f22ef01cSRoman Divacky case ARM::t2LDRi8:
1367f22ef01cSRoman Divacky case ARM::t2LDRi12:
1368f22ef01cSRoman Divacky return ARM::t2LDR_POST;
1369f22ef01cSRoman Divacky case ARM::t2STRi8:
1370f22ef01cSRoman Divacky case ARM::t2STRi12:
1371f22ef01cSRoman Divacky return ARM::t2STR_POST;
1372f22ef01cSRoman Divacky default: llvm_unreachable("Unhandled opcode!");
1373f22ef01cSRoman Divacky }
1374f22ef01cSRoman Divacky }
1375f22ef01cSRoman Divacky
137697bc6c73SDimitry Andric /// Fold proceeding/trailing inc/dec of base register into the
137797bc6c73SDimitry Andric /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
MergeBaseUpdateLoadStore(MachineInstr * MI)1378875ed548SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
137991bc56edSDimitry Andric // Thumb1 doesn't have updating LDR/STR.
138091bc56edSDimitry Andric // FIXME: Use LDM/STM with single register instead.
138191bc56edSDimitry Andric if (isThumb1) return false;
138291bc56edSDimitry Andric
1383875ed548SDimitry Andric unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1384875ed548SDimitry Andric bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1385ff0cc061SDimitry Andric unsigned Opcode = MI->getOpcode();
1386875ed548SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1387f22ef01cSRoman Divacky bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1388f22ef01cSRoman Divacky Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
13892754fe60SDimitry Andric bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
13902754fe60SDimitry Andric if (isi32Load(Opcode) || isi32Store(Opcode))
13912754fe60SDimitry Andric if (MI->getOperand(2).getImm() != 0)
1392f22ef01cSRoman Divacky return false;
1393f22ef01cSRoman Divacky if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1394f22ef01cSRoman Divacky return false;
1395f22ef01cSRoman Divacky
1396f22ef01cSRoman Divacky // Can't do the merge if the destination register is the same as the would-be
1397f22ef01cSRoman Divacky // writeback register.
1398139f7f9bSDimitry Andric if (MI->getOperand(0).getReg() == Base)
1399f22ef01cSRoman Divacky return false;
1400f22ef01cSRoman Divacky
1401f22ef01cSRoman Divacky unsigned PredReg = 0;
14023ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
14037d523365SDimitry Andric int Bytes = getLSMultipleTransferSize(MI);
1404875ed548SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1405875ed548SDimitry Andric MachineBasicBlock::iterator MBBI(MI);
14067d523365SDimitry Andric int Offset;
14077d523365SDimitry Andric MachineBasicBlock::iterator MergeInstr
14087d523365SDimitry Andric = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
14097d523365SDimitry Andric unsigned NewOpc;
14107d523365SDimitry Andric if (!isAM5 && Offset == Bytes) {
14117d523365SDimitry Andric NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
14127d523365SDimitry Andric } else if (Offset == -Bytes) {
14137d523365SDimitry Andric NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
14147d523365SDimitry Andric } else {
14157d523365SDimitry Andric MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
14167d523365SDimitry Andric if (Offset == Bytes) {
14177d523365SDimitry Andric NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
14187d523365SDimitry Andric } else if (!isAM5 && Offset == -Bytes) {
14197d523365SDimitry Andric NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
14207d523365SDimitry Andric } else
1421b6c25e0eSDimitry Andric return false;
14227d523365SDimitry Andric }
14237d523365SDimitry Andric MBB.erase(MergeInstr);
1424b6c25e0eSDimitry Andric
14257d523365SDimitry Andric ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
14267d523365SDimitry Andric
14277d523365SDimitry Andric bool isLd = isLoadSingle(Opcode);
1428f22ef01cSRoman Divacky if (isAM5) {
142991bc56edSDimitry Andric // VLDM[SD]_UPD, VSTM[SD]_UPD
1430e580952dSDimitry Andric // (There are no base-updating versions of VLDR/VSTR instructions, but the
1431e580952dSDimitry Andric // updating load/store-multiple instructions can be used with only one
1432e580952dSDimitry Andric // register.)
1433f22ef01cSRoman Divacky MachineOperand &MO = MI->getOperand(0);
1434875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1435f22ef01cSRoman Divacky .addReg(Base, getDefRegState(true)) // WB base register
1436f22ef01cSRoman Divacky .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1437f22ef01cSRoman Divacky .addImm(Pred).addReg(PredReg)
1438f22ef01cSRoman Divacky .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1439f22ef01cSRoman Divacky getKillRegState(MO.isKill())));
1440f22ef01cSRoman Divacky } else if (isLd) {
14416122f3e6SDimitry Andric if (isAM2) {
14426122f3e6SDimitry Andric // LDR_PRE, LDR_POST
14436122f3e6SDimitry Andric if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1444875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1445f22ef01cSRoman Divacky .addReg(Base, RegState::Define)
1446f22ef01cSRoman Divacky .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1447f22ef01cSRoman Divacky } else {
14487d523365SDimitry Andric int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1449875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
14506122f3e6SDimitry Andric .addReg(Base, RegState::Define)
14517a7e6055SDimitry Andric .addReg(Base)
14527a7e6055SDimitry Andric .addReg(0)
14537a7e6055SDimitry Andric .addImm(Imm)
14547a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
14556122f3e6SDimitry Andric }
14566122f3e6SDimitry Andric } else {
14576122f3e6SDimitry Andric // t2LDR_PRE, t2LDR_POST
1458875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
14596122f3e6SDimitry Andric .addReg(Base, RegState::Define)
14607a7e6055SDimitry Andric .addReg(Base)
14617a7e6055SDimitry Andric .addImm(Offset)
14627a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
14636122f3e6SDimitry Andric }
14646122f3e6SDimitry Andric } else {
1465f22ef01cSRoman Divacky MachineOperand &MO = MI->getOperand(0);
14666122f3e6SDimitry Andric // FIXME: post-indexed stores use am2offset_imm, which still encodes
14676122f3e6SDimitry Andric // the vestigal zero-reg offset register. When that's fixed, this clause
14686122f3e6SDimitry Andric // can be removed entirely.
14696122f3e6SDimitry Andric if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
14707d523365SDimitry Andric int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1471f22ef01cSRoman Divacky // STR_PRE, STR_POST
1472875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1473f22ef01cSRoman Divacky .addReg(MO.getReg(), getKillRegState(MO.isKill()))
14747a7e6055SDimitry Andric .addReg(Base)
14757a7e6055SDimitry Andric .addReg(0)
14767a7e6055SDimitry Andric .addImm(Imm)
14777a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
14786122f3e6SDimitry Andric } else {
1479f22ef01cSRoman Divacky // t2STR_PRE, t2STR_POST
1480875ed548SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1481f22ef01cSRoman Divacky .addReg(MO.getReg(), getKillRegState(MO.isKill()))
14827a7e6055SDimitry Andric .addReg(Base)
14837a7e6055SDimitry Andric .addImm(Offset)
14847a7e6055SDimitry Andric .add(predOps(Pred, PredReg));
1485f22ef01cSRoman Divacky }
14866122f3e6SDimitry Andric }
1487f22ef01cSRoman Divacky MBB.erase(MBBI);
1488f22ef01cSRoman Divacky
1489f22ef01cSRoman Divacky return true;
1490f22ef01cSRoman Divacky }
1491f22ef01cSRoman Divacky
MergeBaseUpdateLSDouble(MachineInstr & MI) const14927d523365SDimitry Andric bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
14937d523365SDimitry Andric unsigned Opcode = MI.getOpcode();
14947d523365SDimitry Andric assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
14957d523365SDimitry Andric "Must have t2STRDi8 or t2LDRDi8");
14967d523365SDimitry Andric if (MI.getOperand(3).getImm() != 0)
14977d523365SDimitry Andric return false;
14987d523365SDimitry Andric
14997d523365SDimitry Andric // Behaviour for writeback is undefined if base register is the same as one
15007d523365SDimitry Andric // of the others.
15017d523365SDimitry Andric const MachineOperand &BaseOp = MI.getOperand(2);
15027d523365SDimitry Andric unsigned Base = BaseOp.getReg();
15037d523365SDimitry Andric const MachineOperand &Reg0Op = MI.getOperand(0);
15047d523365SDimitry Andric const MachineOperand &Reg1Op = MI.getOperand(1);
15057d523365SDimitry Andric if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
15067d523365SDimitry Andric return false;
15077d523365SDimitry Andric
15087d523365SDimitry Andric unsigned PredReg;
15093ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
15107d523365SDimitry Andric MachineBasicBlock::iterator MBBI(MI);
15117d523365SDimitry Andric MachineBasicBlock &MBB = *MI.getParent();
15127d523365SDimitry Andric int Offset;
15137d523365SDimitry Andric MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
15147d523365SDimitry Andric PredReg, Offset);
15157d523365SDimitry Andric unsigned NewOpc;
15167d523365SDimitry Andric if (Offset == 8 || Offset == -8) {
15177d523365SDimitry Andric NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
15187d523365SDimitry Andric } else {
15197d523365SDimitry Andric MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
15207d523365SDimitry Andric if (Offset == 8 || Offset == -8) {
15217d523365SDimitry Andric NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
15227d523365SDimitry Andric } else
15237d523365SDimitry Andric return false;
15247d523365SDimitry Andric }
15257d523365SDimitry Andric MBB.erase(MergeInstr);
15267d523365SDimitry Andric
15277d523365SDimitry Andric DebugLoc DL = MI.getDebugLoc();
15287d523365SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
15297d523365SDimitry Andric if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
15307a7e6055SDimitry Andric MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
15317d523365SDimitry Andric } else {
15327d523365SDimitry Andric assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
15337a7e6055SDimitry Andric MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
15347d523365SDimitry Andric }
15357d523365SDimitry Andric MIB.addReg(BaseOp.getReg(), RegState::Kill)
15367d523365SDimitry Andric .addImm(Offset).addImm(Pred).addReg(PredReg);
15377d523365SDimitry Andric assert(TII->get(Opcode).getNumOperands() == 6 &&
15387d523365SDimitry Andric TII->get(NewOpc).getNumOperands() == 7 &&
15397d523365SDimitry Andric "Unexpected number of operands in Opcode specification.");
15407d523365SDimitry Andric
15417d523365SDimitry Andric // Transfer implicit operands.
15427d523365SDimitry Andric for (const MachineOperand &MO : MI.implicit_operands())
15437a7e6055SDimitry Andric MIB.add(MO);
1544*b5893f02SDimitry Andric MIB.setMemRefs(MI.memoperands());
15457d523365SDimitry Andric
15467d523365SDimitry Andric MBB.erase(MBBI);
15477d523365SDimitry Andric return true;
15487d523365SDimitry Andric }
15497d523365SDimitry Andric
155097bc6c73SDimitry Andric /// Returns true if instruction is a memory operation that this pass is capable
155197bc6c73SDimitry Andric /// of operating on.
isMemoryOp(const MachineInstr & MI)15527d523365SDimitry Andric static bool isMemoryOp(const MachineInstr &MI) {
15537d523365SDimitry Andric unsigned Opcode = MI.getOpcode();
1554f22ef01cSRoman Divacky switch (Opcode) {
1555f22ef01cSRoman Divacky case ARM::VLDRS:
1556f22ef01cSRoman Divacky case ARM::VSTRS:
1557f22ef01cSRoman Divacky case ARM::VLDRD:
1558f22ef01cSRoman Divacky case ARM::VSTRD:
15592754fe60SDimitry Andric case ARM::LDRi12:
15602754fe60SDimitry Andric case ARM::STRi12:
156191bc56edSDimitry Andric case ARM::tLDRi:
156291bc56edSDimitry Andric case ARM::tSTRi:
1563ff0cc061SDimitry Andric case ARM::tLDRspi:
1564ff0cc061SDimitry Andric case ARM::tSTRspi:
1565f22ef01cSRoman Divacky case ARM::t2LDRi8:
1566f22ef01cSRoman Divacky case ARM::t2LDRi12:
1567f22ef01cSRoman Divacky case ARM::t2STRi8:
1568f22ef01cSRoman Divacky case ARM::t2STRi12:
15697d523365SDimitry Andric break;
15707d523365SDimitry Andric default:
1571f22ef01cSRoman Divacky return false;
1572f22ef01cSRoman Divacky }
15737d523365SDimitry Andric if (!MI.getOperand(1).isReg())
15747d523365SDimitry Andric return false;
15757d523365SDimitry Andric
15767d523365SDimitry Andric // When no memory operands are present, conservatively assume unaligned,
15777d523365SDimitry Andric // volatile, unfoldable.
15787d523365SDimitry Andric if (!MI.hasOneMemOperand())
15797d523365SDimitry Andric return false;
15807d523365SDimitry Andric
15817d523365SDimitry Andric const MachineMemOperand &MMO = **MI.memoperands_begin();
15827d523365SDimitry Andric
15837d523365SDimitry Andric // Don't touch volatile memory accesses - we may be changing their order.
15847d523365SDimitry Andric if (MMO.isVolatile())
15857d523365SDimitry Andric return false;
15867d523365SDimitry Andric
15877d523365SDimitry Andric // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
15887d523365SDimitry Andric // not.
15897d523365SDimitry Andric if (MMO.getAlignment() < 4)
15907d523365SDimitry Andric return false;
15917d523365SDimitry Andric
15927d523365SDimitry Andric // str <undef> could probably be eliminated entirely, but for now we just want
15937d523365SDimitry Andric // to avoid making a mess of it.
15947d523365SDimitry Andric // FIXME: Use str <undef> as a wildcard to enable better stm folding.
15957d523365SDimitry Andric if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
15967d523365SDimitry Andric return false;
15977d523365SDimitry Andric
15987d523365SDimitry Andric // Likewise don't mess with references to undefined addresses.
15997d523365SDimitry Andric if (MI.getOperand(1).isUndef())
16007d523365SDimitry Andric return false;
16017d523365SDimitry Andric
16027d523365SDimitry Andric return true;
16037d523365SDimitry Andric }
1604f22ef01cSRoman Divacky
InsertLDR_STR(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,int Offset,bool isDef,unsigned NewOpc,unsigned Reg,bool RegDeadKill,bool RegUndef,unsigned BaseReg,bool BaseKill,bool BaseUndef,ARMCC::CondCodes Pred,unsigned PredReg,const TargetInstrInfo * TII)1605f22ef01cSRoman Divacky static void InsertLDR_STR(MachineBasicBlock &MBB,
16063ca95b02SDimitry Andric MachineBasicBlock::iterator &MBBI, int Offset,
16072cab237bSDimitry Andric bool isDef, unsigned NewOpc, unsigned Reg,
16082cab237bSDimitry Andric bool RegDeadKill, bool RegUndef, unsigned BaseReg,
16092cab237bSDimitry Andric bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
16102cab237bSDimitry Andric unsigned PredReg, const TargetInstrInfo *TII) {
1611f22ef01cSRoman Divacky if (isDef) {
1612f22ef01cSRoman Divacky MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1613f22ef01cSRoman Divacky TII->get(NewOpc))
1614f22ef01cSRoman Divacky .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1615f22ef01cSRoman Divacky .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1616f22ef01cSRoman Divacky MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1617f22ef01cSRoman Divacky } else {
1618f22ef01cSRoman Divacky MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1619f22ef01cSRoman Divacky TII->get(NewOpc))
1620f22ef01cSRoman Divacky .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1621f22ef01cSRoman Divacky .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1622f22ef01cSRoman Divacky MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1623f22ef01cSRoman Divacky }
1624f22ef01cSRoman Divacky }
1625f22ef01cSRoman Divacky
FixInvalidRegPairOp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)1626f22ef01cSRoman Divacky bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1627f22ef01cSRoman Divacky MachineBasicBlock::iterator &MBBI) {
1628f22ef01cSRoman Divacky MachineInstr *MI = &*MBBI;
1629f22ef01cSRoman Divacky unsigned Opcode = MI->getOpcode();
16302cab237bSDimitry Andric // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
16312cab237bSDimitry Andric // if we see this opcode.
16323dac3a9bSDimitry Andric if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
16333dac3a9bSDimitry Andric return false;
16343dac3a9bSDimitry Andric
1635dff0c46cSDimitry Andric const MachineOperand &BaseOp = MI->getOperand(2);
1636dff0c46cSDimitry Andric unsigned BaseReg = BaseOp.getReg();
1637f22ef01cSRoman Divacky unsigned EvenReg = MI->getOperand(0).getReg();
1638f22ef01cSRoman Divacky unsigned OddReg = MI->getOperand(1).getReg();
1639f22ef01cSRoman Divacky unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1640f22ef01cSRoman Divacky unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
16413dac3a9bSDimitry Andric
1642dff0c46cSDimitry Andric // ARM errata 602117: LDRD with base in list may result in incorrect base
1643dff0c46cSDimitry Andric // register when interrupted or faulted.
16443dac3a9bSDimitry Andric bool Errata602117 = EvenReg == BaseReg &&
16453dac3a9bSDimitry Andric (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
16463dac3a9bSDimitry Andric // ARM LDRD/STRD needs consecutive registers.
16473dac3a9bSDimitry Andric bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
16483dac3a9bSDimitry Andric (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
16493dac3a9bSDimitry Andric
16503dac3a9bSDimitry Andric if (!Errata602117 && !NonConsecutiveRegs)
1651f22ef01cSRoman Divacky return false;
1652f22ef01cSRoman Divacky
1653f22ef01cSRoman Divacky bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1654f22ef01cSRoman Divacky bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1655f22ef01cSRoman Divacky bool EvenDeadKill = isLd ?
1656f22ef01cSRoman Divacky MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1657f22ef01cSRoman Divacky bool EvenUndef = MI->getOperand(0).isUndef();
1658f22ef01cSRoman Divacky bool OddDeadKill = isLd ?
1659f22ef01cSRoman Divacky MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1660f22ef01cSRoman Divacky bool OddUndef = MI->getOperand(1).isUndef();
1661f22ef01cSRoman Divacky bool BaseKill = BaseOp.isKill();
1662f22ef01cSRoman Divacky bool BaseUndef = BaseOp.isUndef();
16632cab237bSDimitry Andric assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
16642cab237bSDimitry Andric "register offset not handled below");
16653ca95b02SDimitry Andric int OffImm = getMemoryOpOffset(*MI);
1666f22ef01cSRoman Divacky unsigned PredReg = 0;
16673ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1668f22ef01cSRoman Divacky
16692754fe60SDimitry Andric if (OddRegNum > EvenRegNum && OffImm == 0) {
1670f22ef01cSRoman Divacky // Ascending register numbers and no offset. It's safe to change it to a
1671f22ef01cSRoman Divacky // ldm or stm.
1672f22ef01cSRoman Divacky unsigned NewOpc = (isLd)
16732754fe60SDimitry Andric ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
16742754fe60SDimitry Andric : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1675f22ef01cSRoman Divacky if (isLd) {
1676f22ef01cSRoman Divacky BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1677f22ef01cSRoman Divacky .addReg(BaseReg, getKillRegState(BaseKill))
1678f22ef01cSRoman Divacky .addImm(Pred).addReg(PredReg)
1679f22ef01cSRoman Divacky .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1680f22ef01cSRoman Divacky .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1681f22ef01cSRoman Divacky ++NumLDRD2LDM;
1682f22ef01cSRoman Divacky } else {
1683f22ef01cSRoman Divacky BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1684f22ef01cSRoman Divacky .addReg(BaseReg, getKillRegState(BaseKill))
1685f22ef01cSRoman Divacky .addImm(Pred).addReg(PredReg)
1686f22ef01cSRoman Divacky .addReg(EvenReg,
1687f22ef01cSRoman Divacky getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1688f22ef01cSRoman Divacky .addReg(OddReg,
1689f22ef01cSRoman Divacky getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1690f22ef01cSRoman Divacky ++NumSTRD2STM;
1691f22ef01cSRoman Divacky }
1692f22ef01cSRoman Divacky } else {
1693f22ef01cSRoman Divacky // Split into two instructions.
1694f22ef01cSRoman Divacky unsigned NewOpc = (isLd)
16952754fe60SDimitry Andric ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
16962754fe60SDimitry Andric : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1697dff0c46cSDimitry Andric // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1698dff0c46cSDimitry Andric // so adjust and use t2LDRi12 here for that.
1699dff0c46cSDimitry Andric unsigned NewOpc2 = (isLd)
1700dff0c46cSDimitry Andric ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1701dff0c46cSDimitry Andric : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
17022cab237bSDimitry Andric // If this is a load, make sure the first load does not clobber the base
17032cab237bSDimitry Andric // register before the second load reads it.
17042cab237bSDimitry Andric if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
17052754fe60SDimitry Andric assert(!TRI->regsOverlap(OddReg, BaseReg));
17062cab237bSDimitry Andric InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
17072cab237bSDimitry Andric false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
17082cab237bSDimitry Andric InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
17092cab237bSDimitry Andric false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1710f22ef01cSRoman Divacky } else {
1711f22ef01cSRoman Divacky if (OddReg == EvenReg && EvenDeadKill) {
1712ffd1746dSEd Schouten // If the two source operands are the same, the kill marker is
1713ffd1746dSEd Schouten // probably on the first one. e.g.
17142cab237bSDimitry Andric // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
1715f22ef01cSRoman Divacky EvenDeadKill = false;
1716f22ef01cSRoman Divacky OddDeadKill = true;
1717f22ef01cSRoman Divacky }
1718dff0c46cSDimitry Andric // Never kill the base register in the first instruction.
1719dff0c46cSDimitry Andric if (EvenReg == BaseReg)
1720dff0c46cSDimitry Andric EvenDeadKill = false;
17212cab237bSDimitry Andric InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
17222cab237bSDimitry Andric EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
17232cab237bSDimitry Andric InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
17242cab237bSDimitry Andric OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
1725f22ef01cSRoman Divacky }
1726f22ef01cSRoman Divacky if (isLd)
1727f22ef01cSRoman Divacky ++NumLDRD2LDR;
1728f22ef01cSRoman Divacky else
1729f22ef01cSRoman Divacky ++NumSTRD2STR;
1730f22ef01cSRoman Divacky }
1731f22ef01cSRoman Divacky
1732875ed548SDimitry Andric MBBI = MBB.erase(MBBI);
1733ffd1746dSEd Schouten return true;
1734f22ef01cSRoman Divacky }
1735f22ef01cSRoman Divacky
173697bc6c73SDimitry Andric /// An optimization pass to turn multiple LDR / STR ops of the same base and
173797bc6c73SDimitry Andric /// incrementing offset into LDM / STM ops.
LoadStoreMultipleOpti(MachineBasicBlock & MBB)1738f22ef01cSRoman Divacky bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1739f22ef01cSRoman Divacky MemOpQueue MemOps;
1740f22ef01cSRoman Divacky unsigned CurrBase = 0;
1741ff0cc061SDimitry Andric unsigned CurrOpc = ~0u;
1742f22ef01cSRoman Divacky ARMCC::CondCodes CurrPred = ARMCC::AL;
1743f22ef01cSRoman Divacky unsigned Position = 0;
1744875ed548SDimitry Andric assert(Candidates.size() == 0);
17457d523365SDimitry Andric assert(MergeBaseCandidates.size() == 0);
1746875ed548SDimitry Andric LiveRegsValid = false;
1747f22ef01cSRoman Divacky
1748875ed548SDimitry Andric for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1749875ed548SDimitry Andric I = MBBI) {
1750875ed548SDimitry Andric // The instruction in front of the iterator is the one we look at.
1751875ed548SDimitry Andric MBBI = std::prev(I);
1752f22ef01cSRoman Divacky if (FixInvalidRegPairOp(MBB, MBBI))
1753f22ef01cSRoman Divacky continue;
1754875ed548SDimitry Andric ++Position;
1755f22ef01cSRoman Divacky
17567d523365SDimitry Andric if (isMemoryOp(*MBBI)) {
1757ff0cc061SDimitry Andric unsigned Opcode = MBBI->getOpcode();
1758ffd1746dSEd Schouten const MachineOperand &MO = MBBI->getOperand(0);
1759ffd1746dSEd Schouten unsigned Reg = MO.getReg();
1760875ed548SDimitry Andric unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1761f22ef01cSRoman Divacky unsigned PredReg = 0;
17623ca95b02SDimitry Andric ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
17633ca95b02SDimitry Andric int Offset = getMemoryOpOffset(*MBBI);
1764875ed548SDimitry Andric if (CurrBase == 0) {
1765875ed548SDimitry Andric // Start of a new chain.
1766875ed548SDimitry Andric CurrBase = Base;
1767875ed548SDimitry Andric CurrOpc = Opcode;
1768875ed548SDimitry Andric CurrPred = Pred;
17693ca95b02SDimitry Andric MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1770875ed548SDimitry Andric continue;
1771875ed548SDimitry Andric }
1772875ed548SDimitry Andric // Note: No need to match PredReg in the next if.
1773875ed548SDimitry Andric if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1774284c1978SDimitry Andric // Watch out for:
1775284c1978SDimitry Andric // r4 := ldr [r0, #8]
1776284c1978SDimitry Andric // r4 := ldr [r0, #4]
1777875ed548SDimitry Andric // or
1778875ed548SDimitry Andric // r0 := ldr [r0]
1779875ed548SDimitry Andric // If a load overrides the base register or a register loaded by
1780875ed548SDimitry Andric // another load in our chain, we cannot take this instruction.
1781284c1978SDimitry Andric bool Overlap = false;
1782875ed548SDimitry Andric if (isLoadSingle(Opcode)) {
1783875ed548SDimitry Andric Overlap = (Base == Reg);
1784875ed548SDimitry Andric if (!Overlap) {
1785875ed548SDimitry Andric for (const MemOpQueueEntry &E : MemOps) {
1786875ed548SDimitry Andric if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1787284c1978SDimitry Andric Overlap = true;
1788284c1978SDimitry Andric break;
1789284c1978SDimitry Andric }
1790284c1978SDimitry Andric }
1791875ed548SDimitry Andric }
1792f22ef01cSRoman Divacky }
1793f22ef01cSRoman Divacky
1794875ed548SDimitry Andric if (!Overlap) {
1795875ed548SDimitry Andric // Check offset and sort memory operation into the current chain.
1796f22ef01cSRoman Divacky if (Offset > MemOps.back().Offset) {
17973ca95b02SDimitry Andric MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
1798875ed548SDimitry Andric continue;
1799f22ef01cSRoman Divacky } else {
1800875ed548SDimitry Andric MemOpQueue::iterator MI, ME;
1801875ed548SDimitry Andric for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1802875ed548SDimitry Andric if (Offset < MI->Offset) {
1803875ed548SDimitry Andric // Found a place to insert.
1804f22ef01cSRoman Divacky break;
1805875ed548SDimitry Andric }
1806875ed548SDimitry Andric if (Offset == MI->Offset) {
1807875ed548SDimitry Andric // Collision, abort.
1808875ed548SDimitry Andric MI = ME;
1809f22ef01cSRoman Divacky break;
1810f22ef01cSRoman Divacky }
1811f22ef01cSRoman Divacky }
1812875ed548SDimitry Andric if (MI != MemOps.end()) {
18133ca95b02SDimitry Andric MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
1814875ed548SDimitry Andric continue;
1815f22ef01cSRoman Divacky }
1816f22ef01cSRoman Divacky }
1817f22ef01cSRoman Divacky }
1818f22ef01cSRoman Divacky }
1819f22ef01cSRoman Divacky
1820875ed548SDimitry Andric // Don't advance the iterator; The op will start a new chain next.
1821875ed548SDimitry Andric MBBI = I;
1822875ed548SDimitry Andric --Position;
1823875ed548SDimitry Andric // Fallthrough to look into existing chain.
18244ba319b5SDimitry Andric } else if (MBBI->isDebugInstr()) {
1825875ed548SDimitry Andric continue;
18267d523365SDimitry Andric } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
18277d523365SDimitry Andric MBBI->getOpcode() == ARM::t2STRDi8) {
18287d523365SDimitry Andric // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
18297d523365SDimitry Andric // remember them because we may still be able to merge add/sub into them.
18303ca95b02SDimitry Andric MergeBaseCandidates.push_back(&*MBBI);
18317d523365SDimitry Andric }
18327d523365SDimitry Andric
1833875ed548SDimitry Andric // If we are here then the chain is broken; Extract candidates for a merge.
1834875ed548SDimitry Andric if (MemOps.size() > 0) {
1835875ed548SDimitry Andric FormCandidates(MemOps);
1836875ed548SDimitry Andric // Reset for the next chain.
1837f22ef01cSRoman Divacky CurrBase = 0;
1838ff0cc061SDimitry Andric CurrOpc = ~0u;
1839f22ef01cSRoman Divacky CurrPred = ARMCC::AL;
1840f22ef01cSRoman Divacky MemOps.clear();
1841f22ef01cSRoman Divacky }
1842875ed548SDimitry Andric }
1843875ed548SDimitry Andric if (MemOps.size() > 0)
1844875ed548SDimitry Andric FormCandidates(MemOps);
1845f22ef01cSRoman Divacky
1846875ed548SDimitry Andric // Sort candidates so they get processed from end to begin of the basic
1847875ed548SDimitry Andric // block later; This is necessary for liveness calculation.
1848875ed548SDimitry Andric auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1849875ed548SDimitry Andric return M0->InsertPos < M1->InsertPos;
1850875ed548SDimitry Andric };
1851*b5893f02SDimitry Andric llvm::sort(Candidates, LessThan);
1852875ed548SDimitry Andric
1853875ed548SDimitry Andric // Go through list of candidates and merge.
1854875ed548SDimitry Andric bool Changed = false;
1855875ed548SDimitry Andric for (const MergeCandidate *Candidate : Candidates) {
1856875ed548SDimitry Andric if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1857875ed548SDimitry Andric MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1858875ed548SDimitry Andric // Merge preceding/trailing base inc/dec into the merged op.
1859875ed548SDimitry Andric if (Merged) {
1860875ed548SDimitry Andric Changed = true;
1861875ed548SDimitry Andric unsigned Opcode = Merged->getOpcode();
18627d523365SDimitry Andric if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
18637d523365SDimitry Andric MergeBaseUpdateLSDouble(*Merged);
18647d523365SDimitry Andric else
1865875ed548SDimitry Andric MergeBaseUpdateLSMultiple(Merged);
1866875ed548SDimitry Andric } else {
1867875ed548SDimitry Andric for (MachineInstr *MI : Candidate->Instrs) {
1868875ed548SDimitry Andric if (MergeBaseUpdateLoadStore(MI))
1869875ed548SDimitry Andric Changed = true;
1870f22ef01cSRoman Divacky }
1871f22ef01cSRoman Divacky }
1872875ed548SDimitry Andric } else {
1873875ed548SDimitry Andric assert(Candidate->Instrs.size() == 1);
1874875ed548SDimitry Andric if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1875875ed548SDimitry Andric Changed = true;
1876f22ef01cSRoman Divacky }
1877875ed548SDimitry Andric }
1878875ed548SDimitry Andric Candidates.clear();
18797d523365SDimitry Andric // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
18807d523365SDimitry Andric for (MachineInstr *MI : MergeBaseCandidates)
18817d523365SDimitry Andric MergeBaseUpdateLSDouble(*MI);
18827d523365SDimitry Andric MergeBaseCandidates.clear();
1883875ed548SDimitry Andric
1884875ed548SDimitry Andric return Changed;
1885f22ef01cSRoman Divacky }
1886f22ef01cSRoman Divacky
188797bc6c73SDimitry Andric /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
188897bc6c73SDimitry Andric /// into the preceding stack restore so it directly restore the value of LR
188997bc6c73SDimitry Andric /// into pc.
1890f22ef01cSRoman Divacky /// ldmfd sp!, {..., lr}
1891f22ef01cSRoman Divacky /// bx lr
1892f22ef01cSRoman Divacky /// or
1893f22ef01cSRoman Divacky /// ldmfd sp!, {..., lr}
1894f22ef01cSRoman Divacky /// mov pc, lr
1895f22ef01cSRoman Divacky /// =>
1896f22ef01cSRoman Divacky /// ldmfd sp!, {..., pc}
MergeReturnIntoLDM(MachineBasicBlock & MBB)1897f22ef01cSRoman Divacky bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
189891bc56edSDimitry Andric // Thumb1 LDM doesn't allow high registers.
189991bc56edSDimitry Andric if (isThumb1) return false;
1900f22ef01cSRoman Divacky if (MBB.empty()) return false;
1901f22ef01cSRoman Divacky
19022754fe60SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1903d88c1a5aSDimitry Andric if (MBBI != MBB.begin() && MBBI != MBB.end() &&
1904f22ef01cSRoman Divacky (MBBI->getOpcode() == ARM::BX_RET ||
1905f22ef01cSRoman Divacky MBBI->getOpcode() == ARM::tBX_RET ||
1906f22ef01cSRoman Divacky MBBI->getOpcode() == ARM::MOVPCLR)) {
19077d523365SDimitry Andric MachineBasicBlock::iterator PrevI = std::prev(MBBI);
19084ba319b5SDimitry Andric // Ignore any debug instructions.
19094ba319b5SDimitry Andric while (PrevI->isDebugInstr() && PrevI != MBB.begin())
19107d523365SDimitry Andric --PrevI;
19113ca95b02SDimitry Andric MachineInstr &PrevMI = *PrevI;
19123ca95b02SDimitry Andric unsigned Opcode = PrevMI.getOpcode();
19132754fe60SDimitry Andric if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
19142754fe60SDimitry Andric Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
19152754fe60SDimitry Andric Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
19163ca95b02SDimitry Andric MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
1917f22ef01cSRoman Divacky if (MO.getReg() != ARM::LR)
1918f22ef01cSRoman Divacky return false;
19192754fe60SDimitry Andric unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
19202754fe60SDimitry Andric assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
19212754fe60SDimitry Andric Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
19223ca95b02SDimitry Andric PrevMI.setDesc(TII->get(NewOpc));
1923f22ef01cSRoman Divacky MO.setReg(ARM::PC);
19243ca95b02SDimitry Andric PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
1925f22ef01cSRoman Divacky MBB.erase(MBBI);
19262cab237bSDimitry Andric // We now restore LR into PC so it is not live-out of the return block
19272cab237bSDimitry Andric // anymore: Clear the CSI Restored bit.
19282cab237bSDimitry Andric MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
19292cab237bSDimitry Andric // CSI should be fixed after PrologEpilog Insertion
19302cab237bSDimitry Andric assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
19312cab237bSDimitry Andric for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
19322cab237bSDimitry Andric if (Info.getReg() == ARM::LR) {
19332cab237bSDimitry Andric Info.setRestored(false);
19342cab237bSDimitry Andric break;
19352cab237bSDimitry Andric }
19362cab237bSDimitry Andric }
1937f22ef01cSRoman Divacky return true;
1938f22ef01cSRoman Divacky }
1939f22ef01cSRoman Divacky }
1940f22ef01cSRoman Divacky return false;
1941f22ef01cSRoman Divacky }
1942f22ef01cSRoman Divacky
CombineMovBx(MachineBasicBlock & MBB)19437d523365SDimitry Andric bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
19447d523365SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
19457d523365SDimitry Andric if (MBBI == MBB.begin() || MBBI == MBB.end() ||
19467d523365SDimitry Andric MBBI->getOpcode() != ARM::tBX_RET)
19477d523365SDimitry Andric return false;
19487d523365SDimitry Andric
19497d523365SDimitry Andric MachineBasicBlock::iterator Prev = MBBI;
19507d523365SDimitry Andric --Prev;
19517d523365SDimitry Andric if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
19527d523365SDimitry Andric return false;
19537d523365SDimitry Andric
19547d523365SDimitry Andric for (auto Use : Prev->uses())
19557d523365SDimitry Andric if (Use.isKill()) {
1956d4419f6fSDimitry Andric assert(STI->hasV4TOps());
19577a7e6055SDimitry Andric BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
19587a7e6055SDimitry Andric .addReg(Use.getReg(), RegState::Kill)
19597a7e6055SDimitry Andric .add(predOps(ARMCC::AL))
19603ca95b02SDimitry Andric .copyImplicitOps(*MBBI);
19617d523365SDimitry Andric MBB.erase(MBBI);
19627d523365SDimitry Andric MBB.erase(Prev);
19637d523365SDimitry Andric return true;
19647d523365SDimitry Andric }
19657d523365SDimitry Andric
19667d523365SDimitry Andric llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
19677d523365SDimitry Andric }
19687d523365SDimitry Andric
runOnMachineFunction(MachineFunction & Fn)1969f22ef01cSRoman Divacky bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
19702cab237bSDimitry Andric if (skipFunction(Fn.getFunction()))
19713ca95b02SDimitry Andric return false;
19723ca95b02SDimitry Andric
1973875ed548SDimitry Andric MF = &Fn;
1974ff0cc061SDimitry Andric STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1975ff0cc061SDimitry Andric TL = STI->getTargetLowering();
1976f22ef01cSRoman Divacky AFI = Fn.getInfo<ARMFunctionInfo>();
1977ff0cc061SDimitry Andric TII = STI->getInstrInfo();
1978ff0cc061SDimitry Andric TRI = STI->getRegisterInfo();
19797d523365SDimitry Andric
1980875ed548SDimitry Andric RegClassInfoValid = false;
1981f22ef01cSRoman Divacky isThumb2 = AFI->isThumb2Function();
198291bc56edSDimitry Andric isThumb1 = AFI->isThumbFunction() && !isThumb2;
198391bc56edSDimitry Andric
1984f22ef01cSRoman Divacky bool Modified = false;
1985f22ef01cSRoman Divacky for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1986f22ef01cSRoman Divacky ++MFI) {
1987f22ef01cSRoman Divacky MachineBasicBlock &MBB = *MFI;
1988f22ef01cSRoman Divacky Modified |= LoadStoreMultipleOpti(MBB);
1989ff0cc061SDimitry Andric if (STI->hasV5TOps())
1990f22ef01cSRoman Divacky Modified |= MergeReturnIntoLDM(MBB);
19917d523365SDimitry Andric if (isThumb1)
19927d523365SDimitry Andric Modified |= CombineMovBx(MBB);
1993f22ef01cSRoman Divacky }
1994f22ef01cSRoman Divacky
1995875ed548SDimitry Andric Allocator.DestroyAll();
1996f22ef01cSRoman Divacky return Modified;
1997f22ef01cSRoman Divacky }
1998f22ef01cSRoman Divacky
19997d523365SDimitry Andric #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
20007d523365SDimitry Andric "ARM pre- register allocation load / store optimization pass"
20017d523365SDimitry Andric
2002f22ef01cSRoman Divacky namespace {
20032cab237bSDimitry Andric
200497bc6c73SDimitry Andric /// Pre- register allocation pass that move load / stores from consecutive
200597bc6c73SDimitry Andric /// locations close to make it more likely they will be combined later.
2006f22ef01cSRoman Divacky struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
2007f22ef01cSRoman Divacky static char ID;
2008f22ef01cSRoman Divacky
20097a7e6055SDimitry Andric AliasAnalysis *AA;
20103861d79fSDimitry Andric const DataLayout *TD;
2011f22ef01cSRoman Divacky const TargetInstrInfo *TII;
2012f22ef01cSRoman Divacky const TargetRegisterInfo *TRI;
2013f22ef01cSRoman Divacky const ARMSubtarget *STI;
2014f22ef01cSRoman Divacky MachineRegisterInfo *MRI;
2015f22ef01cSRoman Divacky MachineFunction *MF;
2016f22ef01cSRoman Divacky
ARMPreAllocLoadStoreOpt__anon9023e32a0311::ARMPreAllocLoadStoreOpt20172cab237bSDimitry Andric ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
20182cab237bSDimitry Andric
201991bc56edSDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override;
2020f22ef01cSRoman Divacky
getPassName__anon9023e32a0311::ARMPreAllocLoadStoreOpt2021d88c1a5aSDimitry Andric StringRef getPassName() const override {
20227d523365SDimitry Andric return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
2023f22ef01cSRoman Divacky }
2024f22ef01cSRoman Divacky
getAnalysisUsage__anon9023e32a0311::ARMPreAllocLoadStoreOpt20252cab237bSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
20267a7e6055SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
20277a7e6055SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
20287a7e6055SDimitry Andric }
20297a7e6055SDimitry Andric
2030f22ef01cSRoman Divacky private:
2031f22ef01cSRoman Divacky bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
2032f22ef01cSRoman Divacky unsigned &NewOpc, unsigned &EvenReg,
2033f22ef01cSRoman Divacky unsigned &OddReg, unsigned &BaseReg,
20342754fe60SDimitry Andric int &Offset,
2035f22ef01cSRoman Divacky unsigned &PredReg, ARMCC::CondCodes &Pred,
2036f22ef01cSRoman Divacky bool &isT2);
2037f22ef01cSRoman Divacky bool RescheduleOps(MachineBasicBlock *MBB,
2038f785676fSDimitry Andric SmallVectorImpl<MachineInstr *> &Ops,
2039f22ef01cSRoman Divacky unsigned Base, bool isLd,
2040f22ef01cSRoman Divacky DenseMap<MachineInstr*, unsigned> &MI2LocMap);
2041f22ef01cSRoman Divacky bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
2042f22ef01cSRoman Divacky };
20432cab237bSDimitry Andric
20442cab237bSDimitry Andric } // end anonymous namespace
20452cab237bSDimitry Andric
2046f22ef01cSRoman Divacky char ARMPreAllocLoadStoreOpt::ID = 0;
2047f22ef01cSRoman Divacky
20483ca95b02SDimitry Andric INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
20497d523365SDimitry Andric ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
20507d523365SDimitry Andric
runOnMachineFunction(MachineFunction & Fn)2051f22ef01cSRoman Divacky bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
20522cab237bSDimitry Andric if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
20533ca95b02SDimitry Andric return false;
20543ca95b02SDimitry Andric
20557d523365SDimitry Andric TD = &Fn.getDataLayout();
205639d628a0SDimitry Andric STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
2057ff0cc061SDimitry Andric TII = STI->getInstrInfo();
2058ff0cc061SDimitry Andric TRI = STI->getRegisterInfo();
2059f22ef01cSRoman Divacky MRI = &Fn.getRegInfo();
2060f22ef01cSRoman Divacky MF = &Fn;
20617a7e6055SDimitry Andric AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2062f22ef01cSRoman Divacky
2063f22ef01cSRoman Divacky bool Modified = false;
20647d523365SDimitry Andric for (MachineBasicBlock &MFI : Fn)
20657d523365SDimitry Andric Modified |= RescheduleLoadStoreInstrs(&MFI);
2066f22ef01cSRoman Divacky
2067f22ef01cSRoman Divacky return Modified;
2068f22ef01cSRoman Divacky }
2069f22ef01cSRoman Divacky
IsSafeAndProfitableToMove(bool isLd,unsigned Base,MachineBasicBlock::iterator I,MachineBasicBlock::iterator E,SmallPtrSetImpl<MachineInstr * > & MemOps,SmallSet<unsigned,4> & MemRegs,const TargetRegisterInfo * TRI,AliasAnalysis * AA)2070f22ef01cSRoman Divacky static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
2071f22ef01cSRoman Divacky MachineBasicBlock::iterator I,
2072f22ef01cSRoman Divacky MachineBasicBlock::iterator E,
207339d628a0SDimitry Andric SmallPtrSetImpl<MachineInstr*> &MemOps,
2074f22ef01cSRoman Divacky SmallSet<unsigned, 4> &MemRegs,
20757a7e6055SDimitry Andric const TargetRegisterInfo *TRI,
20767a7e6055SDimitry Andric AliasAnalysis *AA) {
2077f22ef01cSRoman Divacky // Are there stores / loads / calls between them?
2078f22ef01cSRoman Divacky SmallSet<unsigned, 4> AddedRegPressure;
2079f22ef01cSRoman Divacky while (++I != E) {
20804ba319b5SDimitry Andric if (I->isDebugInstr() || MemOps.count(&*I))
2081f22ef01cSRoman Divacky continue;
2082dff0c46cSDimitry Andric if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
2083f22ef01cSRoman Divacky return false;
20847a7e6055SDimitry Andric if (I->mayStore() || (!isLd && I->mayLoad()))
20857a7e6055SDimitry Andric for (MachineInstr *MemOp : MemOps)
20867a7e6055SDimitry Andric if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
2087f22ef01cSRoman Divacky return false;
2088f22ef01cSRoman Divacky for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2089f22ef01cSRoman Divacky MachineOperand &MO = I->getOperand(j);
2090f22ef01cSRoman Divacky if (!MO.isReg())
2091f22ef01cSRoman Divacky continue;
2092f22ef01cSRoman Divacky unsigned Reg = MO.getReg();
2093f22ef01cSRoman Divacky if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2094f22ef01cSRoman Divacky return false;
2095f22ef01cSRoman Divacky if (Reg != Base && !MemRegs.count(Reg))
2096f22ef01cSRoman Divacky AddedRegPressure.insert(Reg);
2097f22ef01cSRoman Divacky }
2098f22ef01cSRoman Divacky }
2099f22ef01cSRoman Divacky
2100f22ef01cSRoman Divacky // Estimate register pressure increase due to the transformation.
2101f22ef01cSRoman Divacky if (MemRegs.size() <= 4)
2102f22ef01cSRoman Divacky // Ok if we are moving small number of instructions.
2103f22ef01cSRoman Divacky return true;
2104f22ef01cSRoman Divacky return AddedRegPressure.size() <= MemRegs.size() * 2;
2105f22ef01cSRoman Divacky }
2106f22ef01cSRoman Divacky
2107f22ef01cSRoman Divacky bool
CanFormLdStDWord(MachineInstr * Op0,MachineInstr * Op1,DebugLoc & dl,unsigned & NewOpc,unsigned & FirstReg,unsigned & SecondReg,unsigned & BaseReg,int & Offset,unsigned & PredReg,ARMCC::CondCodes & Pred,bool & isT2)2108f22ef01cSRoman Divacky ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
210997bc6c73SDimitry Andric DebugLoc &dl, unsigned &NewOpc,
211097bc6c73SDimitry Andric unsigned &FirstReg,
211197bc6c73SDimitry Andric unsigned &SecondReg,
211297bc6c73SDimitry Andric unsigned &BaseReg, int &Offset,
211397bc6c73SDimitry Andric unsigned &PredReg,
2114f22ef01cSRoman Divacky ARMCC::CondCodes &Pred,
2115f22ef01cSRoman Divacky bool &isT2) {
2116f22ef01cSRoman Divacky // Make sure we're allowed to generate LDRD/STRD.
2117f22ef01cSRoman Divacky if (!STI->hasV5TEOps())
2118f22ef01cSRoman Divacky return false;
2119f22ef01cSRoman Divacky
2120f22ef01cSRoman Divacky // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2121f22ef01cSRoman Divacky unsigned Scale = 1;
2122f22ef01cSRoman Divacky unsigned Opcode = Op0->getOpcode();
212391bc56edSDimitry Andric if (Opcode == ARM::LDRi12) {
2124f22ef01cSRoman Divacky NewOpc = ARM::LDRD;
212591bc56edSDimitry Andric } else if (Opcode == ARM::STRi12) {
2126f22ef01cSRoman Divacky NewOpc = ARM::STRD;
212791bc56edSDimitry Andric } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2128f22ef01cSRoman Divacky NewOpc = ARM::t2LDRDi8;
2129f22ef01cSRoman Divacky Scale = 4;
2130f22ef01cSRoman Divacky isT2 = true;
2131f22ef01cSRoman Divacky } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2132f22ef01cSRoman Divacky NewOpc = ARM::t2STRDi8;
2133f22ef01cSRoman Divacky Scale = 4;
2134f22ef01cSRoman Divacky isT2 = true;
213591bc56edSDimitry Andric } else {
2136f22ef01cSRoman Divacky return false;
213791bc56edSDimitry Andric }
2138f22ef01cSRoman Divacky
21392754fe60SDimitry Andric // Make sure the base address satisfies i64 ld / st alignment requirement.
2140f785676fSDimitry Andric // At the moment, we ignore the memoryoperand's value.
2141f785676fSDimitry Andric // If we want to use AliasAnalysis, we should check it accordingly.
2142f22ef01cSRoman Divacky if (!Op0->hasOneMemOperand() ||
2143f22ef01cSRoman Divacky (*Op0->memoperands_begin())->isVolatile())
2144f22ef01cSRoman Divacky return false;
2145f22ef01cSRoman Divacky
2146f22ef01cSRoman Divacky unsigned Align = (*Op0->memoperands_begin())->getAlignment();
21472cab237bSDimitry Andric const Function &Func = MF->getFunction();
2148f22ef01cSRoman Divacky unsigned ReqAlign = STI->hasV6Ops()
21492cab237bSDimitry Andric ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
2150f22ef01cSRoman Divacky : 8; // Pre-v6 need 8-byte align
2151f22ef01cSRoman Divacky if (Align < ReqAlign)
2152f22ef01cSRoman Divacky return false;
2153f22ef01cSRoman Divacky
2154f22ef01cSRoman Divacky // Then make sure the immediate offset fits.
21553ca95b02SDimitry Andric int OffImm = getMemoryOpOffset(*Op0);
2156f22ef01cSRoman Divacky if (isT2) {
2157f22ef01cSRoman Divacky int Limit = (1 << 8) * Scale;
21583b0f4066SDimitry Andric if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2159f22ef01cSRoman Divacky return false;
2160f22ef01cSRoman Divacky Offset = OffImm;
2161f22ef01cSRoman Divacky } else {
2162f22ef01cSRoman Divacky ARM_AM::AddrOpc AddSub = ARM_AM::add;
2163f22ef01cSRoman Divacky if (OffImm < 0) {
2164f22ef01cSRoman Divacky AddSub = ARM_AM::sub;
2165f22ef01cSRoman Divacky OffImm = - OffImm;
2166f22ef01cSRoman Divacky }
2167f22ef01cSRoman Divacky int Limit = (1 << 8) * Scale;
2168f22ef01cSRoman Divacky if (OffImm >= Limit || (OffImm & (Scale-1)))
2169f22ef01cSRoman Divacky return false;
2170f22ef01cSRoman Divacky Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2171f22ef01cSRoman Divacky }
217297bc6c73SDimitry Andric FirstReg = Op0->getOperand(0).getReg();
217397bc6c73SDimitry Andric SecondReg = Op1->getOperand(0).getReg();
217497bc6c73SDimitry Andric if (FirstReg == SecondReg)
2175f22ef01cSRoman Divacky return false;
2176f22ef01cSRoman Divacky BaseReg = Op0->getOperand(1).getReg();
21773ca95b02SDimitry Andric Pred = getInstrPredicate(*Op0, PredReg);
2178f22ef01cSRoman Divacky dl = Op0->getDebugLoc();
2179f22ef01cSRoman Divacky return true;
2180f22ef01cSRoman Divacky }
2181f22ef01cSRoman Divacky
RescheduleOps(MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Ops,unsigned Base,bool isLd,DenseMap<MachineInstr *,unsigned> & MI2LocMap)2182f22ef01cSRoman Divacky bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2183f785676fSDimitry Andric SmallVectorImpl<MachineInstr *> &Ops,
2184f22ef01cSRoman Divacky unsigned Base, bool isLd,
2185f22ef01cSRoman Divacky DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2186f22ef01cSRoman Divacky bool RetVal = false;
2187f22ef01cSRoman Divacky
2188f22ef01cSRoman Divacky // Sort by offset (in reverse order).
2189*b5893f02SDimitry Andric llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
21903ca95b02SDimitry Andric int LOffset = getMemoryOpOffset(*LHS);
21913ca95b02SDimitry Andric int ROffset = getMemoryOpOffset(*RHS);
219291bc56edSDimitry Andric assert(LHS == RHS || LOffset != ROffset);
219391bc56edSDimitry Andric return LOffset > ROffset;
219491bc56edSDimitry Andric });
2195f22ef01cSRoman Divacky
2196f22ef01cSRoman Divacky // The loads / stores of the same base are in order. Scan them from first to
2197ffd1746dSEd Schouten // last and check for the following:
2198f22ef01cSRoman Divacky // 1. Any def of base.
2199f22ef01cSRoman Divacky // 2. Any gaps.
2200f22ef01cSRoman Divacky while (Ops.size() > 1) {
2201f22ef01cSRoman Divacky unsigned FirstLoc = ~0U;
2202f22ef01cSRoman Divacky unsigned LastLoc = 0;
220391bc56edSDimitry Andric MachineInstr *FirstOp = nullptr;
220491bc56edSDimitry Andric MachineInstr *LastOp = nullptr;
2205f22ef01cSRoman Divacky int LastOffset = 0;
2206f22ef01cSRoman Divacky unsigned LastOpcode = 0;
2207f22ef01cSRoman Divacky unsigned LastBytes = 0;
2208f22ef01cSRoman Divacky unsigned NumMove = 0;
2209f22ef01cSRoman Divacky for (int i = Ops.size() - 1; i >= 0; --i) {
22107a7e6055SDimitry Andric // Make sure each operation has the same kind.
2211f22ef01cSRoman Divacky MachineInstr *Op = Ops[i];
22127a7e6055SDimitry Andric unsigned LSMOpcode
22137a7e6055SDimitry Andric = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
22147a7e6055SDimitry Andric if (LastOpcode && LSMOpcode != LastOpcode)
22157a7e6055SDimitry Andric break;
22167a7e6055SDimitry Andric
22177a7e6055SDimitry Andric // Check that we have a continuous set of offsets.
22187a7e6055SDimitry Andric int Offset = getMemoryOpOffset(*Op);
22197a7e6055SDimitry Andric unsigned Bytes = getLSMultipleTransferSize(Op);
22207a7e6055SDimitry Andric if (LastBytes) {
22217a7e6055SDimitry Andric if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
22227a7e6055SDimitry Andric break;
22237a7e6055SDimitry Andric }
22247a7e6055SDimitry Andric
22257a7e6055SDimitry Andric // Don't try to reschedule too many instructions.
22267a7e6055SDimitry Andric if (NumMove == 8) // FIXME: Tune this limit.
22277a7e6055SDimitry Andric break;
22287a7e6055SDimitry Andric
22297a7e6055SDimitry Andric // Found a mergable instruction; save information about it.
22307a7e6055SDimitry Andric ++NumMove;
22317a7e6055SDimitry Andric LastOffset = Offset;
22327a7e6055SDimitry Andric LastBytes = Bytes;
22337a7e6055SDimitry Andric LastOpcode = LSMOpcode;
22347a7e6055SDimitry Andric
2235f22ef01cSRoman Divacky unsigned Loc = MI2LocMap[Op];
2236f22ef01cSRoman Divacky if (Loc <= FirstLoc) {
2237f22ef01cSRoman Divacky FirstLoc = Loc;
2238f22ef01cSRoman Divacky FirstOp = Op;
2239f22ef01cSRoman Divacky }
2240f22ef01cSRoman Divacky if (Loc >= LastLoc) {
2241f22ef01cSRoman Divacky LastLoc = Loc;
2242f22ef01cSRoman Divacky LastOp = Op;
2243f22ef01cSRoman Divacky }
2244f22ef01cSRoman Divacky }
2245f22ef01cSRoman Divacky
2246f22ef01cSRoman Divacky if (NumMove <= 1)
2247f22ef01cSRoman Divacky Ops.pop_back();
2248f22ef01cSRoman Divacky else {
2249f22ef01cSRoman Divacky SmallPtrSet<MachineInstr*, 4> MemOps;
2250f22ef01cSRoman Divacky SmallSet<unsigned, 4> MemRegs;
22517a7e6055SDimitry Andric for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
2252f22ef01cSRoman Divacky MemOps.insert(Ops[i]);
2253f22ef01cSRoman Divacky MemRegs.insert(Ops[i]->getOperand(0).getReg());
2254f22ef01cSRoman Divacky }
2255f22ef01cSRoman Divacky
2256f22ef01cSRoman Divacky // Be conservative, if the instructions are too far apart, don't
2257f22ef01cSRoman Divacky // move them. We want to limit the increase of register pressure.
2258f22ef01cSRoman Divacky bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2259f22ef01cSRoman Divacky if (DoMove)
2260f22ef01cSRoman Divacky DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
22617a7e6055SDimitry Andric MemOps, MemRegs, TRI, AA);
2262f22ef01cSRoman Divacky if (!DoMove) {
2263f22ef01cSRoman Divacky for (unsigned i = 0; i != NumMove; ++i)
2264f22ef01cSRoman Divacky Ops.pop_back();
2265f22ef01cSRoman Divacky } else {
2266f22ef01cSRoman Divacky // This is the new location for the loads / stores.
2267f22ef01cSRoman Divacky MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
22683ca95b02SDimitry Andric while (InsertPos != MBB->end() &&
22694ba319b5SDimitry Andric (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
2270f22ef01cSRoman Divacky ++InsertPos;
2271f22ef01cSRoman Divacky
2272f22ef01cSRoman Divacky // If we are moving a pair of loads / stores, see if it makes sense
2273f22ef01cSRoman Divacky // to try to allocate a pair of registers that can form register pairs.
2274f22ef01cSRoman Divacky MachineInstr *Op0 = Ops.back();
2275f22ef01cSRoman Divacky MachineInstr *Op1 = Ops[Ops.size()-2];
227697bc6c73SDimitry Andric unsigned FirstReg = 0, SecondReg = 0;
22772754fe60SDimitry Andric unsigned BaseReg = 0, PredReg = 0;
2278f22ef01cSRoman Divacky ARMCC::CondCodes Pred = ARMCC::AL;
2279f22ef01cSRoman Divacky bool isT2 = false;
2280f22ef01cSRoman Divacky unsigned NewOpc = 0;
2281f22ef01cSRoman Divacky int Offset = 0;
2282f22ef01cSRoman Divacky DebugLoc dl;
2283f22ef01cSRoman Divacky if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
228497bc6c73SDimitry Andric FirstReg, SecondReg, BaseReg,
2285f22ef01cSRoman Divacky Offset, PredReg, Pred, isT2)) {
2286f22ef01cSRoman Divacky Ops.pop_back();
2287f22ef01cSRoman Divacky Ops.pop_back();
2288f22ef01cSRoman Divacky
228917a519f9SDimitry Andric const MCInstrDesc &MCID = TII->get(NewOpc);
22907ae0e2c9SDimitry Andric const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
229197bc6c73SDimitry Andric MRI->constrainRegClass(FirstReg, TRC);
229297bc6c73SDimitry Andric MRI->constrainRegClass(SecondReg, TRC);
2293bd5abe19SDimitry Andric
2294f22ef01cSRoman Divacky // Form the pair instruction.
2295f22ef01cSRoman Divacky if (isLd) {
229617a519f9SDimitry Andric MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
229797bc6c73SDimitry Andric .addReg(FirstReg, RegState::Define)
229897bc6c73SDimitry Andric .addReg(SecondReg, RegState::Define)
2299f22ef01cSRoman Divacky .addReg(BaseReg);
23002754fe60SDimitry Andric // FIXME: We're converting from LDRi12 to an insn that still
23012754fe60SDimitry Andric // uses addrmode2, so we need an explicit offset reg. It should
23022754fe60SDimitry Andric // always by reg0 since we're transforming LDRi12s.
2303f22ef01cSRoman Divacky if (!isT2)
23042754fe60SDimitry Andric MIB.addReg(0);
2305f22ef01cSRoman Divacky MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2306*b5893f02SDimitry Andric MIB.cloneMergedMemRefs({Op0, Op1});
23074ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2308f22ef01cSRoman Divacky ++NumLDRDFormed;
2309f22ef01cSRoman Divacky } else {
231017a519f9SDimitry Andric MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
231197bc6c73SDimitry Andric .addReg(FirstReg)
231297bc6c73SDimitry Andric .addReg(SecondReg)
2313f22ef01cSRoman Divacky .addReg(BaseReg);
23142754fe60SDimitry Andric // FIXME: We're converting from LDRi12 to an insn that still
23152754fe60SDimitry Andric // uses addrmode2, so we need an explicit offset reg. It should
23162754fe60SDimitry Andric // always by reg0 since we're transforming STRi12s.
2317f22ef01cSRoman Divacky if (!isT2)
23182754fe60SDimitry Andric MIB.addReg(0);
2319f22ef01cSRoman Divacky MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2320*b5893f02SDimitry Andric MIB.cloneMergedMemRefs({Op0, Op1});
23214ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
2322f22ef01cSRoman Divacky ++NumSTRDFormed;
2323f22ef01cSRoman Divacky }
2324f22ef01cSRoman Divacky MBB->erase(Op0);
2325f22ef01cSRoman Divacky MBB->erase(Op1);
2326f22ef01cSRoman Divacky
232797bc6c73SDimitry Andric if (!isT2) {
2328f22ef01cSRoman Divacky // Add register allocation hints to form register pairs.
232997bc6c73SDimitry Andric MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
233097bc6c73SDimitry Andric MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
233197bc6c73SDimitry Andric }
2332f22ef01cSRoman Divacky } else {
2333f22ef01cSRoman Divacky for (unsigned i = 0; i != NumMove; ++i) {
2334f22ef01cSRoman Divacky MachineInstr *Op = Ops.back();
2335f22ef01cSRoman Divacky Ops.pop_back();
2336f22ef01cSRoman Divacky MBB->splice(InsertPos, MBB, Op);
2337f22ef01cSRoman Divacky }
2338f22ef01cSRoman Divacky }
2339f22ef01cSRoman Divacky
2340f22ef01cSRoman Divacky NumLdStMoved += NumMove;
2341f22ef01cSRoman Divacky RetVal = true;
2342f22ef01cSRoman Divacky }
2343f22ef01cSRoman Divacky }
2344f22ef01cSRoman Divacky }
2345f22ef01cSRoman Divacky
2346f22ef01cSRoman Divacky return RetVal;
2347f22ef01cSRoman Divacky }
2348f22ef01cSRoman Divacky
2349f22ef01cSRoman Divacky bool
RescheduleLoadStoreInstrs(MachineBasicBlock * MBB)2350f22ef01cSRoman Divacky ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2351f22ef01cSRoman Divacky bool RetVal = false;
2352f22ef01cSRoman Divacky
2353f22ef01cSRoman Divacky DenseMap<MachineInstr*, unsigned> MI2LocMap;
2354f22ef01cSRoman Divacky DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
2355f22ef01cSRoman Divacky DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
2356f22ef01cSRoman Divacky SmallVector<unsigned, 4> LdBases;
2357f22ef01cSRoman Divacky SmallVector<unsigned, 4> StBases;
2358f22ef01cSRoman Divacky
2359f22ef01cSRoman Divacky unsigned Loc = 0;
2360f22ef01cSRoman Divacky MachineBasicBlock::iterator MBBI = MBB->begin();
2361f22ef01cSRoman Divacky MachineBasicBlock::iterator E = MBB->end();
2362f22ef01cSRoman Divacky while (MBBI != E) {
2363f22ef01cSRoman Divacky for (; MBBI != E; ++MBBI) {
23643ca95b02SDimitry Andric MachineInstr &MI = *MBBI;
23653ca95b02SDimitry Andric if (MI.isCall() || MI.isTerminator()) {
2366f22ef01cSRoman Divacky // Stop at barriers.
2367f22ef01cSRoman Divacky ++MBBI;
2368f22ef01cSRoman Divacky break;
2369f22ef01cSRoman Divacky }
2370f22ef01cSRoman Divacky
23714ba319b5SDimitry Andric if (!MI.isDebugInstr())
23723ca95b02SDimitry Andric MI2LocMap[&MI] = ++Loc;
2373ffd1746dSEd Schouten
23743ca95b02SDimitry Andric if (!isMemoryOp(MI))
2375f22ef01cSRoman Divacky continue;
2376f22ef01cSRoman Divacky unsigned PredReg = 0;
2377dff0c46cSDimitry Andric if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2378f22ef01cSRoman Divacky continue;
2379f22ef01cSRoman Divacky
23803ca95b02SDimitry Andric int Opc = MI.getOpcode();
2381875ed548SDimitry Andric bool isLd = isLoadSingle(Opc);
23823ca95b02SDimitry Andric unsigned Base = MI.getOperand(1).getReg();
2383f22ef01cSRoman Divacky int Offset = getMemoryOpOffset(MI);
2384f22ef01cSRoman Divacky
2385f22ef01cSRoman Divacky bool StopHere = false;
2386f22ef01cSRoman Divacky if (isLd) {
2387f22ef01cSRoman Divacky DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2388f22ef01cSRoman Divacky Base2LdsMap.find(Base);
2389f22ef01cSRoman Divacky if (BI != Base2LdsMap.end()) {
2390f22ef01cSRoman Divacky for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
23913ca95b02SDimitry Andric if (Offset == getMemoryOpOffset(*BI->second[i])) {
2392f22ef01cSRoman Divacky StopHere = true;
2393f22ef01cSRoman Divacky break;
2394f22ef01cSRoman Divacky }
2395f22ef01cSRoman Divacky }
2396f22ef01cSRoman Divacky if (!StopHere)
23973ca95b02SDimitry Andric BI->second.push_back(&MI);
2398f22ef01cSRoman Divacky } else {
23993ca95b02SDimitry Andric Base2LdsMap[Base].push_back(&MI);
2400f22ef01cSRoman Divacky LdBases.push_back(Base);
2401f22ef01cSRoman Divacky }
2402f22ef01cSRoman Divacky } else {
2403f22ef01cSRoman Divacky DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
2404f22ef01cSRoman Divacky Base2StsMap.find(Base);
2405f22ef01cSRoman Divacky if (BI != Base2StsMap.end()) {
2406f22ef01cSRoman Divacky for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
24073ca95b02SDimitry Andric if (Offset == getMemoryOpOffset(*BI->second[i])) {
2408f22ef01cSRoman Divacky StopHere = true;
2409f22ef01cSRoman Divacky break;
2410f22ef01cSRoman Divacky }
2411f22ef01cSRoman Divacky }
2412f22ef01cSRoman Divacky if (!StopHere)
24133ca95b02SDimitry Andric BI->second.push_back(&MI);
2414f22ef01cSRoman Divacky } else {
24153ca95b02SDimitry Andric Base2StsMap[Base].push_back(&MI);
2416f22ef01cSRoman Divacky StBases.push_back(Base);
2417f22ef01cSRoman Divacky }
2418f22ef01cSRoman Divacky }
2419f22ef01cSRoman Divacky
2420f22ef01cSRoman Divacky if (StopHere) {
2421f22ef01cSRoman Divacky // Found a duplicate (a base+offset combination that's seen earlier).
2422f22ef01cSRoman Divacky // Backtrack.
2423f22ef01cSRoman Divacky --Loc;
2424f22ef01cSRoman Divacky break;
2425f22ef01cSRoman Divacky }
2426f22ef01cSRoman Divacky }
2427f22ef01cSRoman Divacky
2428f22ef01cSRoman Divacky // Re-schedule loads.
2429f22ef01cSRoman Divacky for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2430f22ef01cSRoman Divacky unsigned Base = LdBases[i];
2431f785676fSDimitry Andric SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2432f22ef01cSRoman Divacky if (Lds.size() > 1)
2433f22ef01cSRoman Divacky RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2434f22ef01cSRoman Divacky }
2435f22ef01cSRoman Divacky
2436f22ef01cSRoman Divacky // Re-schedule stores.
2437f22ef01cSRoman Divacky for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2438f22ef01cSRoman Divacky unsigned Base = StBases[i];
2439f785676fSDimitry Andric SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2440f22ef01cSRoman Divacky if (Sts.size() > 1)
2441f22ef01cSRoman Divacky RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2442f22ef01cSRoman Divacky }
2443f22ef01cSRoman Divacky
2444f22ef01cSRoman Divacky if (MBBI != E) {
2445f22ef01cSRoman Divacky Base2LdsMap.clear();
2446f22ef01cSRoman Divacky Base2StsMap.clear();
2447f22ef01cSRoman Divacky LdBases.clear();
2448f22ef01cSRoman Divacky StBases.clear();
2449f22ef01cSRoman Divacky }
2450f22ef01cSRoman Divacky }
2451f22ef01cSRoman Divacky
2452f22ef01cSRoman Divacky return RetVal;
2453f22ef01cSRoman Divacky }
2454f22ef01cSRoman Divacky
245597bc6c73SDimitry Andric /// Returns an instance of the load / store optimization pass.
createARMLoadStoreOptimizationPass(bool PreAlloc)2456f22ef01cSRoman Divacky FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2457f22ef01cSRoman Divacky if (PreAlloc)
2458f22ef01cSRoman Divacky return new ARMPreAllocLoadStoreOpt();
2459f22ef01cSRoman Divacky return new ARMLoadStoreOpt();
2460f22ef01cSRoman Divacky }
2461