1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "arm-ldst-opt"
50 
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
62 
63 /// This switch disables formation of double/multi instructions that could
64 /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
65 /// disabled. This can be used to create libraries that are robust even when
66 /// users provoke undefined behaviour by supplying misaligned pointers.
67 /// \see mayCombineMisaligned()
68 static cl::opt<bool>
69 AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
70     cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
71 
72 namespace llvm {
73 void initializeARMLoadStoreOptPass(PassRegistry &);
74 }
75 
76 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
77 
78 namespace {
79   /// Post- register allocation pass the combine load / store instructions to
80   /// form ldm / stm instructions.
81   struct ARMLoadStoreOpt : public MachineFunctionPass {
82     static char ID;
83     ARMLoadStoreOpt() : MachineFunctionPass(ID) {
84       initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
85     }
86 
87     const MachineFunction *MF;
88     const TargetInstrInfo *TII;
89     const TargetRegisterInfo *TRI;
90     const ARMSubtarget *STI;
91     const TargetLowering *TL;
92     ARMFunctionInfo *AFI;
93     LivePhysRegs LiveRegs;
94     RegisterClassInfo RegClassInfo;
95     MachineBasicBlock::const_iterator LiveRegPos;
96     bool LiveRegsValid;
97     bool RegClassInfoValid;
98     bool isThumb1, isThumb2;
99 
100     bool runOnMachineFunction(MachineFunction &Fn) override;
101 
102     const char *getPassName() const override {
103       return ARM_LOAD_STORE_OPT_NAME;
104     }
105 
106   private:
107     /// A set of load/store MachineInstrs with same base register sorted by
108     /// offset.
109     struct MemOpQueueEntry {
110       MachineInstr *MI;
111       int Offset;        ///< Load/Store offset.
112       unsigned Position; ///< Position as counted from end of basic block.
113       MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
114         : MI(MI), Offset(Offset), Position(Position) {}
115     };
116     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
117 
118     /// A set of MachineInstrs that fulfill (nearly all) conditions to get
119     /// merged into a LDM/STM.
120     struct MergeCandidate {
121       /// List of instructions ordered by load/store offset.
122       SmallVector<MachineInstr*, 4> Instrs;
123       /// Index in Instrs of the instruction being latest in the schedule.
124       unsigned LatestMIIdx;
125       /// Index in Instrs of the instruction being earliest in the schedule.
126       unsigned EarliestMIIdx;
127       /// Index into the basic block where the merged instruction will be
128       /// inserted. (See MemOpQueueEntry.Position)
129       unsigned InsertPos;
130       /// Whether the instructions can be merged into a ldm/stm instruction.
131       bool CanMergeToLSMulti;
132       /// Whether the instructions can be merged into a ldrd/strd instruction.
133       bool CanMergeToLSDouble;
134     };
135     SpecificBumpPtrAllocator<MergeCandidate> Allocator;
136     SmallVector<const MergeCandidate*,4> Candidates;
137     SmallVector<MachineInstr*,4> MergeBaseCandidates;
138 
139     void moveLiveRegsBefore(const MachineBasicBlock &MBB,
140                             MachineBasicBlock::const_iterator Before);
141     unsigned findFreeReg(const TargetRegisterClass &RegClass);
142     void UpdateBaseRegUses(MachineBasicBlock &MBB,
143                            MachineBasicBlock::iterator MBBI,
144                            DebugLoc DL, unsigned Base, unsigned WordOffset,
145                            ARMCC::CondCodes Pred, unsigned PredReg);
146     MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
147         MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
148         bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
149         DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
150     MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
151         MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
152         bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
153         DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
154     void FormCandidates(const MemOpQueue &MemOps);
155     MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
156     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
157                              MachineBasicBlock::iterator &MBBI);
158     bool MergeBaseUpdateLoadStore(MachineInstr *MI);
159     bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
160     bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
161     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
162     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
163     bool CombineMovBx(MachineBasicBlock &MBB);
164   };
165   char ARMLoadStoreOpt::ID = 0;
166 }
167 
168 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
169 
170 static bool definesCPSR(const MachineInstr *MI) {
171   for (const auto &MO : MI->operands()) {
172     if (!MO.isReg())
173       continue;
174     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
175       // If the instruction has live CPSR def, then it's not safe to fold it
176       // into load / store.
177       return true;
178   }
179 
180   return false;
181 }
182 
183 static int getMemoryOpOffset(const MachineInstr *MI) {
184   unsigned Opcode = MI->getOpcode();
185   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
186   unsigned NumOperands = MI->getDesc().getNumOperands();
187   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
188 
189   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
190       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
191       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
192       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
193     return OffField;
194 
195   // Thumb1 immediate offsets are scaled by 4
196   if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
197       Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
198     return OffField * 4;
199 
200   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
201     : ARM_AM::getAM5Offset(OffField) * 4;
202   ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
203     : ARM_AM::getAM5Op(OffField);
204 
205   if (Op == ARM_AM::sub)
206     return -Offset;
207 
208   return Offset;
209 }
210 
211 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
212   return MI.getOperand(1);
213 }
214 
215 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
216   return MI.getOperand(0);
217 }
218 
219 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
220   switch (Opcode) {
221   default: llvm_unreachable("Unhandled opcode!");
222   case ARM::LDRi12:
223     ++NumLDMGened;
224     switch (Mode) {
225     default: llvm_unreachable("Unhandled submode!");
226     case ARM_AM::ia: return ARM::LDMIA;
227     case ARM_AM::da: return ARM::LDMDA;
228     case ARM_AM::db: return ARM::LDMDB;
229     case ARM_AM::ib: return ARM::LDMIB;
230     }
231   case ARM::STRi12:
232     ++NumSTMGened;
233     switch (Mode) {
234     default: llvm_unreachable("Unhandled submode!");
235     case ARM_AM::ia: return ARM::STMIA;
236     case ARM_AM::da: return ARM::STMDA;
237     case ARM_AM::db: return ARM::STMDB;
238     case ARM_AM::ib: return ARM::STMIB;
239     }
240   case ARM::tLDRi:
241   case ARM::tLDRspi:
242     // tLDMIA is writeback-only - unless the base register is in the input
243     // reglist.
244     ++NumLDMGened;
245     switch (Mode) {
246     default: llvm_unreachable("Unhandled submode!");
247     case ARM_AM::ia: return ARM::tLDMIA;
248     }
249   case ARM::tSTRi:
250   case ARM::tSTRspi:
251     // There is no non-writeback tSTMIA either.
252     ++NumSTMGened;
253     switch (Mode) {
254     default: llvm_unreachable("Unhandled submode!");
255     case ARM_AM::ia: return ARM::tSTMIA_UPD;
256     }
257   case ARM::t2LDRi8:
258   case ARM::t2LDRi12:
259     ++NumLDMGened;
260     switch (Mode) {
261     default: llvm_unreachable("Unhandled submode!");
262     case ARM_AM::ia: return ARM::t2LDMIA;
263     case ARM_AM::db: return ARM::t2LDMDB;
264     }
265   case ARM::t2STRi8:
266   case ARM::t2STRi12:
267     ++NumSTMGened;
268     switch (Mode) {
269     default: llvm_unreachable("Unhandled submode!");
270     case ARM_AM::ia: return ARM::t2STMIA;
271     case ARM_AM::db: return ARM::t2STMDB;
272     }
273   case ARM::VLDRS:
274     ++NumVLDMGened;
275     switch (Mode) {
276     default: llvm_unreachable("Unhandled submode!");
277     case ARM_AM::ia: return ARM::VLDMSIA;
278     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
279     }
280   case ARM::VSTRS:
281     ++NumVSTMGened;
282     switch (Mode) {
283     default: llvm_unreachable("Unhandled submode!");
284     case ARM_AM::ia: return ARM::VSTMSIA;
285     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
286     }
287   case ARM::VLDRD:
288     ++NumVLDMGened;
289     switch (Mode) {
290     default: llvm_unreachable("Unhandled submode!");
291     case ARM_AM::ia: return ARM::VLDMDIA;
292     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
293     }
294   case ARM::VSTRD:
295     ++NumVSTMGened;
296     switch (Mode) {
297     default: llvm_unreachable("Unhandled submode!");
298     case ARM_AM::ia: return ARM::VSTMDIA;
299     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
300     }
301   }
302 }
303 
304 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
305   switch (Opcode) {
306   default: llvm_unreachable("Unhandled opcode!");
307   case ARM::LDMIA_RET:
308   case ARM::LDMIA:
309   case ARM::LDMIA_UPD:
310   case ARM::STMIA:
311   case ARM::STMIA_UPD:
312   case ARM::tLDMIA:
313   case ARM::tLDMIA_UPD:
314   case ARM::tSTMIA_UPD:
315   case ARM::t2LDMIA_RET:
316   case ARM::t2LDMIA:
317   case ARM::t2LDMIA_UPD:
318   case ARM::t2STMIA:
319   case ARM::t2STMIA_UPD:
320   case ARM::VLDMSIA:
321   case ARM::VLDMSIA_UPD:
322   case ARM::VSTMSIA:
323   case ARM::VSTMSIA_UPD:
324   case ARM::VLDMDIA:
325   case ARM::VLDMDIA_UPD:
326   case ARM::VSTMDIA:
327   case ARM::VSTMDIA_UPD:
328     return ARM_AM::ia;
329 
330   case ARM::LDMDA:
331   case ARM::LDMDA_UPD:
332   case ARM::STMDA:
333   case ARM::STMDA_UPD:
334     return ARM_AM::da;
335 
336   case ARM::LDMDB:
337   case ARM::LDMDB_UPD:
338   case ARM::STMDB:
339   case ARM::STMDB_UPD:
340   case ARM::t2LDMDB:
341   case ARM::t2LDMDB_UPD:
342   case ARM::t2STMDB:
343   case ARM::t2STMDB_UPD:
344   case ARM::VLDMSDB_UPD:
345   case ARM::VSTMSDB_UPD:
346   case ARM::VLDMDDB_UPD:
347   case ARM::VSTMDDB_UPD:
348     return ARM_AM::db;
349 
350   case ARM::LDMIB:
351   case ARM::LDMIB_UPD:
352   case ARM::STMIB:
353   case ARM::STMIB_UPD:
354     return ARM_AM::ib;
355   }
356 }
357 
358 static bool isT1i32Load(unsigned Opc) {
359   return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
360 }
361 
362 static bool isT2i32Load(unsigned Opc) {
363   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
364 }
365 
366 static bool isi32Load(unsigned Opc) {
367   return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
368 }
369 
370 static bool isT1i32Store(unsigned Opc) {
371   return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
372 }
373 
374 static bool isT2i32Store(unsigned Opc) {
375   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
376 }
377 
378 static bool isi32Store(unsigned Opc) {
379   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
380 }
381 
382 static bool isLoadSingle(unsigned Opc) {
383   return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
384 }
385 
386 static unsigned getImmScale(unsigned Opc) {
387   switch (Opc) {
388   default: llvm_unreachable("Unhandled opcode!");
389   case ARM::tLDRi:
390   case ARM::tSTRi:
391   case ARM::tLDRspi:
392   case ARM::tSTRspi:
393     return 1;
394   case ARM::tLDRHi:
395   case ARM::tSTRHi:
396     return 2;
397   case ARM::tLDRBi:
398   case ARM::tSTRBi:
399     return 4;
400   }
401 }
402 
403 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
404   switch (MI->getOpcode()) {
405   default: return 0;
406   case ARM::LDRi12:
407   case ARM::STRi12:
408   case ARM::tLDRi:
409   case ARM::tSTRi:
410   case ARM::tLDRspi:
411   case ARM::tSTRspi:
412   case ARM::t2LDRi8:
413   case ARM::t2LDRi12:
414   case ARM::t2STRi8:
415   case ARM::t2STRi12:
416   case ARM::VLDRS:
417   case ARM::VSTRS:
418     return 4;
419   case ARM::VLDRD:
420   case ARM::VSTRD:
421     return 8;
422   case ARM::LDMIA:
423   case ARM::LDMDA:
424   case ARM::LDMDB:
425   case ARM::LDMIB:
426   case ARM::STMIA:
427   case ARM::STMDA:
428   case ARM::STMDB:
429   case ARM::STMIB:
430   case ARM::tLDMIA:
431   case ARM::tLDMIA_UPD:
432   case ARM::tSTMIA_UPD:
433   case ARM::t2LDMIA:
434   case ARM::t2LDMDB:
435   case ARM::t2STMIA:
436   case ARM::t2STMDB:
437   case ARM::VLDMSIA:
438   case ARM::VSTMSIA:
439     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
440   case ARM::VLDMDIA:
441   case ARM::VSTMDIA:
442     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
443   }
444 }
445 
446 /// Update future uses of the base register with the offset introduced
447 /// due to writeback. This function only works on Thumb1.
448 void
449 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
450                                    MachineBasicBlock::iterator MBBI,
451                                    DebugLoc DL, unsigned Base,
452                                    unsigned WordOffset,
453                                    ARMCC::CondCodes Pred, unsigned PredReg) {
454   assert(isThumb1 && "Can only update base register uses for Thumb1!");
455   // Start updating any instructions with immediate offsets. Insert a SUB before
456   // the first non-updateable instruction (if any).
457   for (; MBBI != MBB.end(); ++MBBI) {
458     bool InsertSub = false;
459     unsigned Opc = MBBI->getOpcode();
460 
461     if (MBBI->readsRegister(Base)) {
462       int Offset;
463       bool IsLoad =
464         Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
465       bool IsStore =
466         Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
467 
468       if (IsLoad || IsStore) {
469         // Loads and stores with immediate offsets can be updated, but only if
470         // the new offset isn't negative.
471         // The MachineOperand containing the offset immediate is the last one
472         // before predicates.
473         MachineOperand &MO =
474           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
475         // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
476         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
477 
478         // If storing the base register, it needs to be reset first.
479         unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
480 
481         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
482           MO.setImm(Offset);
483         else
484           InsertSub = true;
485 
486       } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
487                  !definesCPSR(MBBI)) {
488         // SUBS/ADDS using this register, with a dead def of the CPSR.
489         // Merge it with the update; if the merged offset is too large,
490         // insert a new sub instead.
491         MachineOperand &MO =
492           MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
493         Offset = (Opc == ARM::tSUBi8) ?
494           MO.getImm() + WordOffset * 4 :
495           MO.getImm() - WordOffset * 4 ;
496         if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
497           // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
498           // Offset == 0.
499           MO.setImm(Offset);
500           // The base register has now been reset, so exit early.
501           return;
502         } else {
503           InsertSub = true;
504         }
505 
506       } else {
507         // Can't update the instruction.
508         InsertSub = true;
509       }
510 
511     } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
512       // Since SUBS sets the condition flags, we can't place the base reset
513       // after an instruction that has a live CPSR def.
514       // The base register might also contain an argument for a function call.
515       InsertSub = true;
516     }
517 
518     if (InsertSub) {
519       // An instruction above couldn't be updated, so insert a sub.
520       AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
521         .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
522       return;
523     }
524 
525     if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
526       // Register got killed. Stop updating.
527       return;
528   }
529 
530   // End of block was reached.
531   if (MBB.succ_size() > 0) {
532     // FIXME: Because of a bug, live registers are sometimes missing from
533     // the successor blocks' live-in sets. This means we can't trust that
534     // information and *always* have to reset at the end of a block.
535     // See PR21029.
536     if (MBBI != MBB.end()) --MBBI;
537     AddDefaultT1CC(
538       BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
539       .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
540   }
541 }
542 
543 /// Return the first register of class \p RegClass that is not in \p Regs.
544 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
545   if (!RegClassInfoValid) {
546     RegClassInfo.runOnMachineFunction(*MF);
547     RegClassInfoValid = true;
548   }
549 
550   for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
551     if (!LiveRegs.contains(Reg))
552       return Reg;
553   return 0;
554 }
555 
556 /// Compute live registers just before instruction \p Before (in normal schedule
557 /// direction). Computes backwards so multiple queries in the same block must
558 /// come in reverse order.
559 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
560     MachineBasicBlock::const_iterator Before) {
561   // Initialize if we never queried in this block.
562   if (!LiveRegsValid) {
563     LiveRegs.init(TRI);
564     LiveRegs.addLiveOuts(&MBB, true);
565     LiveRegPos = MBB.end();
566     LiveRegsValid = true;
567   }
568   // Move backward just before the "Before" position.
569   while (LiveRegPos != Before) {
570     --LiveRegPos;
571     LiveRegs.stepBackward(*LiveRegPos);
572   }
573 }
574 
575 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
576                         unsigned Reg) {
577   for (const std::pair<unsigned, bool> &R : Regs)
578     if (R.first == Reg)
579       return true;
580   return false;
581 }
582 
583 /// Create and insert a LDM or STM with Base as base register and registers in
584 /// Regs as the register operands that would be loaded / stored.  It returns
585 /// true if the transformation is done.
586 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
587     MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
588     bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
589     DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
590   unsigned NumRegs = Regs.size();
591   assert(NumRegs > 1);
592 
593   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
594   // Compute liveness information for that register to make the decision.
595   bool SafeToClobberCPSR = !isThumb1 ||
596     (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
597      MachineBasicBlock::LQR_Dead);
598 
599   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
600 
601   // Exception: If the base register is in the input reglist, Thumb1 LDM is
602   // non-writeback.
603   // It's also not possible to merge an STR of the base register in Thumb1.
604   if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
605     assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
606     if (Opcode == ARM::tLDRi) {
607       Writeback = false;
608     } else if (Opcode == ARM::tSTRi) {
609       return nullptr;
610     }
611   }
612 
613   ARM_AM::AMSubMode Mode = ARM_AM::ia;
614   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
615   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
616   bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
617 
618   if (Offset == 4 && haveIBAndDA) {
619     Mode = ARM_AM::ib;
620   } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
621     Mode = ARM_AM::da;
622   } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
623     // VLDM/VSTM do not support DB mode without also updating the base reg.
624     Mode = ARM_AM::db;
625   } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
626     // Check if this is a supported opcode before inserting instructions to
627     // calculate a new base register.
628     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
629 
630     // If starting offset isn't zero, insert a MI to materialize a new base.
631     // But only do so if it is cost effective, i.e. merging more than two
632     // loads / stores.
633     if (NumRegs <= 2)
634       return nullptr;
635 
636     // On Thumb1, it's not worth materializing a new base register without
637     // clobbering the CPSR (i.e. not using ADDS/SUBS).
638     if (!SafeToClobberCPSR)
639       return nullptr;
640 
641     unsigned NewBase;
642     if (isi32Load(Opcode)) {
643       // If it is a load, then just use one of the destination registers
644       // as the new base. Will no longer be writeback in Thumb1.
645       NewBase = Regs[NumRegs-1].first;
646       Writeback = false;
647     } else {
648       // Find a free register that we can use as scratch register.
649       moveLiveRegsBefore(MBB, InsertBefore);
650       // The merged instruction does not exist yet but will use several Regs if
651       // it is a Store.
652       if (!isLoadSingle(Opcode))
653         for (const std::pair<unsigned, bool> &R : Regs)
654           LiveRegs.addReg(R.first);
655 
656       NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
657       if (NewBase == 0)
658         return nullptr;
659     }
660 
661     int BaseOpc =
662       isThumb2 ? ARM::t2ADDri :
663       (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
664       (isThumb1 && Offset < 8) ? ARM::tADDi3 :
665       isThumb1 ? ARM::tADDi8  : ARM::ADDri;
666 
667     if (Offset < 0) {
668       Offset = - Offset;
669       BaseOpc =
670         isThumb2 ? ARM::t2SUBri :
671         (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
672         isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
673     }
674 
675     if (!TL->isLegalAddImmediate(Offset))
676       // FIXME: Try add with register operand?
677       return nullptr; // Probably not worth it then.
678 
679     // We can only append a kill flag to the add/sub input if the value is not
680     // used in the register list of the stm as well.
681     bool KillOldBase = BaseKill &&
682       (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
683 
684     if (isThumb1) {
685       // Thumb1: depending on immediate size, use either
686       //   ADDS NewBase, Base, #imm3
687       // or
688       //   MOV  NewBase, Base
689       //   ADDS NewBase, #imm8.
690       if (Base != NewBase &&
691           (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
692         // Need to insert a MOV to the new base first.
693         if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
694             !STI->hasV6Ops()) {
695           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
696           if (Pred != ARMCC::AL)
697             return nullptr;
698           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
699             .addReg(Base, getKillRegState(KillOldBase));
700         } else
701           BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
702             .addReg(Base, getKillRegState(KillOldBase))
703             .addImm(Pred).addReg(PredReg);
704 
705         // The following ADDS/SUBS becomes an update.
706         Base = NewBase;
707         KillOldBase = true;
708       }
709       if (BaseOpc == ARM::tADDrSPi) {
710         assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
711         BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
712           .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
713           .addImm(Pred).addReg(PredReg);
714       } else
715         AddDefaultT1CC(
716           BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
717           .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
718           .addImm(Pred).addReg(PredReg);
719     } else {
720       BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
721         .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
722         .addImm(Pred).addReg(PredReg).addReg(0);
723     }
724     Base = NewBase;
725     BaseKill = true; // New base is always killed straight away.
726   }
727 
728   bool isDef = isLoadSingle(Opcode);
729 
730   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
731   // base register writeback.
732   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
733   if (!Opcode)
734     return nullptr;
735 
736   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
737   // - There is no writeback (LDM of base register),
738   // - the base register is killed by the merged instruction,
739   // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
740   //   to reset the base register.
741   // Otherwise, don't merge.
742   // It's safe to return here since the code to materialize a new base register
743   // above is also conditional on SafeToClobberCPSR.
744   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
745     return nullptr;
746 
747   MachineInstrBuilder MIB;
748 
749   if (Writeback) {
750     assert(isThumb1 && "expected Writeback only inThumb1");
751     if (Opcode == ARM::tLDMIA) {
752       assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
753       // Update tLDMIA with writeback if necessary.
754       Opcode = ARM::tLDMIA_UPD;
755     }
756 
757     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
758 
759     // Thumb1: we might need to set base writeback when building the MI.
760     MIB.addReg(Base, getDefRegState(true))
761        .addReg(Base, getKillRegState(BaseKill));
762 
763     // The base isn't dead after a merged instruction with writeback.
764     // Insert a sub instruction after the newly formed instruction to reset.
765     if (!BaseKill)
766       UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
767 
768   } else {
769     // No writeback, simply build the MachineInstr.
770     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
771     MIB.addReg(Base, getKillRegState(BaseKill));
772   }
773 
774   MIB.addImm(Pred).addReg(PredReg);
775 
776   for (const std::pair<unsigned, bool> &R : Regs)
777     MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
778 
779   return MIB.getInstr();
780 }
781 
782 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
783     MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
784     bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
785     DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
786   bool IsLoad = isi32Load(Opcode);
787   assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
788   unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
789 
790   assert(Regs.size() == 2);
791   MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
792                                     TII->get(LoadStoreOpcode));
793   if (IsLoad) {
794     MIB.addReg(Regs[0].first, RegState::Define)
795        .addReg(Regs[1].first, RegState::Define);
796   } else {
797     MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
798        .addReg(Regs[1].first, getKillRegState(Regs[1].second));
799   }
800   MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
801   return MIB.getInstr();
802 }
803 
804 /// Call MergeOps and update MemOps and merges accordingly on success.
805 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
806   const MachineInstr *First = Cand.Instrs.front();
807   unsigned Opcode = First->getOpcode();
808   bool IsLoad = isLoadSingle(Opcode);
809   SmallVector<std::pair<unsigned, bool>, 8> Regs;
810   SmallVector<unsigned, 4> ImpDefs;
811   DenseSet<unsigned> KilledRegs;
812   DenseSet<unsigned> UsedRegs;
813   // Determine list of registers and list of implicit super-register defs.
814   for (const MachineInstr *MI : Cand.Instrs) {
815     const MachineOperand &MO = getLoadStoreRegOp(*MI);
816     unsigned Reg = MO.getReg();
817     bool IsKill = MO.isKill();
818     if (IsKill)
819       KilledRegs.insert(Reg);
820     Regs.push_back(std::make_pair(Reg, IsKill));
821     UsedRegs.insert(Reg);
822 
823     if (IsLoad) {
824       // Collect any implicit defs of super-registers, after merging we can't
825       // be sure anymore that we properly preserved these live ranges and must
826       // removed these implicit operands.
827       for (const MachineOperand &MO : MI->implicit_operands()) {
828         if (!MO.isReg() || !MO.isDef() || MO.isDead())
829           continue;
830         assert(MO.isImplicit());
831         unsigned DefReg = MO.getReg();
832 
833         if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
834           continue;
835         // We can ignore cases where the super-reg is read and written.
836         if (MI->readsRegister(DefReg))
837           continue;
838         ImpDefs.push_back(DefReg);
839       }
840     }
841   }
842 
843   // Attempt the merge.
844   typedef MachineBasicBlock::iterator iterator;
845   MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
846   iterator InsertBefore = std::next(iterator(LatestMI));
847   MachineBasicBlock &MBB = *LatestMI->getParent();
848   unsigned Offset = getMemoryOpOffset(First);
849   unsigned Base = getLoadStoreBaseOp(*First).getReg();
850   bool BaseKill = LatestMI->killsRegister(Base);
851   unsigned PredReg = 0;
852   ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
853   DebugLoc DL = First->getDebugLoc();
854   MachineInstr *Merged = nullptr;
855   if (Cand.CanMergeToLSDouble)
856     Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
857                                    Opcode, Pred, PredReg, DL, Regs);
858   if (!Merged && Cand.CanMergeToLSMulti)
859     Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
860                                   Opcode, Pred, PredReg, DL, Regs);
861   if (!Merged)
862     return nullptr;
863 
864   // Determine earliest instruction that will get removed. We then keep an
865   // iterator just above it so the following erases don't invalidated it.
866   iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
867   bool EarliestAtBegin = false;
868   if (EarliestI == MBB.begin()) {
869     EarliestAtBegin = true;
870   } else {
871     EarliestI = std::prev(EarliestI);
872   }
873 
874   // Remove instructions which have been merged.
875   for (MachineInstr *MI : Cand.Instrs)
876     MBB.erase(MI);
877 
878   // Determine range between the earliest removed instruction and the new one.
879   if (EarliestAtBegin)
880     EarliestI = MBB.begin();
881   else
882     EarliestI = std::next(EarliestI);
883   auto FixupRange = make_range(EarliestI, iterator(Merged));
884 
885   if (isLoadSingle(Opcode)) {
886     // If the previous loads defined a super-reg, then we have to mark earlier
887     // operands undef; Replicate the super-reg def on the merged instruction.
888     for (MachineInstr &MI : FixupRange) {
889       for (unsigned &ImpDefReg : ImpDefs) {
890         for (MachineOperand &MO : MI.implicit_operands()) {
891           if (!MO.isReg() || MO.getReg() != ImpDefReg)
892             continue;
893           if (MO.readsReg())
894             MO.setIsUndef();
895           else if (MO.isDef())
896             ImpDefReg = 0;
897         }
898       }
899     }
900 
901     MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
902     for (unsigned ImpDef : ImpDefs)
903       MIB.addReg(ImpDef, RegState::ImplicitDefine);
904   } else {
905     // Remove kill flags: We are possibly storing the values later now.
906     assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
907     for (MachineInstr &MI : FixupRange) {
908       for (MachineOperand &MO : MI.uses()) {
909         if (!MO.isReg() || !MO.isKill())
910           continue;
911         if (UsedRegs.count(MO.getReg()))
912           MO.setIsKill(false);
913       }
914     }
915     assert(ImpDefs.empty());
916   }
917 
918   return Merged;
919 }
920 
921 static bool isValidLSDoubleOffset(int Offset) {
922   unsigned Value = abs(Offset);
923   // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
924   // multiplied by 4.
925   return (Value % 4) == 0 && Value < 1024;
926 }
927 
928 /// Return true for loads/stores that can be combined to a double/multi
929 /// operation without increasing the requirements for alignment.
930 static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
931                                  const MachineInstr &MI) {
932   // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
933   // difference.
934   unsigned Opcode = MI.getOpcode();
935   if (!isi32Load(Opcode) && !isi32Store(Opcode))
936     return true;
937 
938   // Stack pointer alignment is out of the programmers control so we can trust
939   // SP-relative loads/stores.
940   if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
941       STI.getFrameLowering()->getTransientStackAlignment() >= 4)
942     return true;
943   return false;
944 }
945 
946 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
947 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
948   const MachineInstr *FirstMI = MemOps[0].MI;
949   unsigned Opcode = FirstMI->getOpcode();
950   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
951   unsigned Size = getLSMultipleTransferSize(FirstMI);
952 
953   unsigned SIndex = 0;
954   unsigned EIndex = MemOps.size();
955   do {
956     // Look at the first instruction.
957     const MachineInstr *MI = MemOps[SIndex].MI;
958     int Offset = MemOps[SIndex].Offset;
959     const MachineOperand &PMO = getLoadStoreRegOp(*MI);
960     unsigned PReg = PMO.getReg();
961     unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
962     unsigned Latest = SIndex;
963     unsigned Earliest = SIndex;
964     unsigned Count = 1;
965     bool CanMergeToLSDouble =
966       STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
967     // ARM errata 602117: LDRD with base in list may result in incorrect base
968     // register when interrupted or faulted.
969     if (STI->isCortexM3() && isi32Load(Opcode) &&
970         PReg == getLoadStoreBaseOp(*MI).getReg())
971       CanMergeToLSDouble = false;
972 
973     bool CanMergeToLSMulti = true;
974     // On swift vldm/vstm starting with an odd register number as that needs
975     // more uops than single vldrs.
976     if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
977       CanMergeToLSMulti = false;
978 
979     // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
980     // deprecated; LDM to PC is fine but cannot happen here.
981     if (PReg == ARM::SP || PReg == ARM::PC)
982       CanMergeToLSMulti = CanMergeToLSDouble = false;
983 
984     // Should we be conservative?
985     if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
986       CanMergeToLSMulti = CanMergeToLSDouble = false;
987 
988     // Merge following instructions where possible.
989     for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
990       int NewOffset = MemOps[I].Offset;
991       if (NewOffset != Offset + (int)Size)
992         break;
993       const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
994       unsigned Reg = MO.getReg();
995       if (Reg == ARM::SP || Reg == ARM::PC)
996         break;
997 
998       // See if the current load/store may be part of a multi load/store.
999       unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
1000       bool PartOfLSMulti = CanMergeToLSMulti;
1001       if (PartOfLSMulti) {
1002         // Register numbers must be in ascending order.
1003         if (RegNum <= PRegNum)
1004           PartOfLSMulti = false;
1005         // For VFP / NEON load/store multiples, the registers must be
1006         // consecutive and within the limit on the number of registers per
1007         // instruction.
1008         else if (!isNotVFP && RegNum != PRegNum+1)
1009           PartOfLSMulti = false;
1010       }
1011       // See if the current load/store may be part of a double load/store.
1012       bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
1013 
1014       if (!PartOfLSMulti && !PartOfLSDouble)
1015         break;
1016       CanMergeToLSMulti &= PartOfLSMulti;
1017       CanMergeToLSDouble &= PartOfLSDouble;
1018       // Track MemOp with latest and earliest position (Positions are
1019       // counted in reverse).
1020       unsigned Position = MemOps[I].Position;
1021       if (Position < MemOps[Latest].Position)
1022         Latest = I;
1023       else if (Position > MemOps[Earliest].Position)
1024         Earliest = I;
1025       // Prepare for next MemOp.
1026       Offset += Size;
1027       PRegNum = RegNum;
1028     }
1029 
1030     // Form a candidate from the Ops collected so far.
1031     MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1032     for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1033       Candidate->Instrs.push_back(MemOps[C].MI);
1034     Candidate->LatestMIIdx = Latest - SIndex;
1035     Candidate->EarliestMIIdx = Earliest - SIndex;
1036     Candidate->InsertPos = MemOps[Latest].Position;
1037     if (Count == 1)
1038       CanMergeToLSMulti = CanMergeToLSDouble = false;
1039     Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1040     Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1041     Candidates.push_back(Candidate);
1042     // Continue after the chain.
1043     SIndex += Count;
1044   } while (SIndex < EIndex);
1045 }
1046 
1047 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1048                                             ARM_AM::AMSubMode Mode) {
1049   switch (Opc) {
1050   default: llvm_unreachable("Unhandled opcode!");
1051   case ARM::LDMIA:
1052   case ARM::LDMDA:
1053   case ARM::LDMDB:
1054   case ARM::LDMIB:
1055     switch (Mode) {
1056     default: llvm_unreachable("Unhandled submode!");
1057     case ARM_AM::ia: return ARM::LDMIA_UPD;
1058     case ARM_AM::ib: return ARM::LDMIB_UPD;
1059     case ARM_AM::da: return ARM::LDMDA_UPD;
1060     case ARM_AM::db: return ARM::LDMDB_UPD;
1061     }
1062   case ARM::STMIA:
1063   case ARM::STMDA:
1064   case ARM::STMDB:
1065   case ARM::STMIB:
1066     switch (Mode) {
1067     default: llvm_unreachable("Unhandled submode!");
1068     case ARM_AM::ia: return ARM::STMIA_UPD;
1069     case ARM_AM::ib: return ARM::STMIB_UPD;
1070     case ARM_AM::da: return ARM::STMDA_UPD;
1071     case ARM_AM::db: return ARM::STMDB_UPD;
1072     }
1073   case ARM::t2LDMIA:
1074   case ARM::t2LDMDB:
1075     switch (Mode) {
1076     default: llvm_unreachable("Unhandled submode!");
1077     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1078     case ARM_AM::db: return ARM::t2LDMDB_UPD;
1079     }
1080   case ARM::t2STMIA:
1081   case ARM::t2STMDB:
1082     switch (Mode) {
1083     default: llvm_unreachable("Unhandled submode!");
1084     case ARM_AM::ia: return ARM::t2STMIA_UPD;
1085     case ARM_AM::db: return ARM::t2STMDB_UPD;
1086     }
1087   case ARM::VLDMSIA:
1088     switch (Mode) {
1089     default: llvm_unreachable("Unhandled submode!");
1090     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1091     case ARM_AM::db: return ARM::VLDMSDB_UPD;
1092     }
1093   case ARM::VLDMDIA:
1094     switch (Mode) {
1095     default: llvm_unreachable("Unhandled submode!");
1096     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1097     case ARM_AM::db: return ARM::VLDMDDB_UPD;
1098     }
1099   case ARM::VSTMSIA:
1100     switch (Mode) {
1101     default: llvm_unreachable("Unhandled submode!");
1102     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1103     case ARM_AM::db: return ARM::VSTMSDB_UPD;
1104     }
1105   case ARM::VSTMDIA:
1106     switch (Mode) {
1107     default: llvm_unreachable("Unhandled submode!");
1108     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1109     case ARM_AM::db: return ARM::VSTMDDB_UPD;
1110     }
1111   }
1112 }
1113 
1114 /// Check if the given instruction increments or decrements a register and
1115 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1116 /// generated by the instruction are possibly read as well.
1117 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1118                                   ARMCC::CondCodes Pred, unsigned PredReg) {
1119   bool CheckCPSRDef;
1120   int Scale;
1121   switch (MI.getOpcode()) {
1122   case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
1123   case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
1124   case ARM::t2SUBri:
1125   case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
1126   case ARM::t2ADDri:
1127   case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
1128   case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
1129   case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1130   default: return 0;
1131   }
1132 
1133   unsigned MIPredReg;
1134   if (MI.getOperand(0).getReg() != Reg ||
1135       MI.getOperand(1).getReg() != Reg ||
1136       getInstrPredicate(MI, MIPredReg) != Pred ||
1137       MIPredReg != PredReg)
1138     return 0;
1139 
1140   if (CheckCPSRDef && definesCPSR(&MI))
1141     return 0;
1142   return MI.getOperand(2).getImm() * Scale;
1143 }
1144 
1145 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1146 static MachineBasicBlock::iterator
1147 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1148                  ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1149   Offset = 0;
1150   MachineBasicBlock &MBB = *MBBI->getParent();
1151   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1152   MachineBasicBlock::iterator EndMBBI = MBB.end();
1153   if (MBBI == BeginMBBI)
1154     return EndMBBI;
1155 
1156   // Skip debug values.
1157   MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1158   while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1159     --PrevMBBI;
1160 
1161   Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1162   return Offset == 0 ? EndMBBI : PrevMBBI;
1163 }
1164 
1165 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1166 static MachineBasicBlock::iterator
1167 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1168                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1169   Offset = 0;
1170   MachineBasicBlock &MBB = *MBBI->getParent();
1171   MachineBasicBlock::iterator EndMBBI = MBB.end();
1172   MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1173   // Skip debug values.
1174   while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1175     ++NextMBBI;
1176   if (NextMBBI == EndMBBI)
1177     return EndMBBI;
1178 
1179   Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1180   return Offset == 0 ? EndMBBI : NextMBBI;
1181 }
1182 
1183 /// Fold proceeding/trailing inc/dec of base register into the
1184 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1185 ///
1186 /// stmia rn, <ra, rb, rc>
1187 /// rn := rn + 4 * 3;
1188 /// =>
1189 /// stmia rn!, <ra, rb, rc>
1190 ///
1191 /// rn := rn - 4 * 3;
1192 /// ldmia rn, <ra, rb, rc>
1193 /// =>
1194 /// ldmdb rn!, <ra, rb, rc>
1195 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1196   // Thumb1 is already using updating loads/stores.
1197   if (isThumb1) return false;
1198 
1199   const MachineOperand &BaseOP = MI->getOperand(0);
1200   unsigned Base = BaseOP.getReg();
1201   bool BaseKill = BaseOP.isKill();
1202   unsigned PredReg = 0;
1203   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1204   unsigned Opcode = MI->getOpcode();
1205   DebugLoc DL = MI->getDebugLoc();
1206 
1207   // Can't use an updating ld/st if the base register is also a dest
1208   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1209   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1210     if (MI->getOperand(i).getReg() == Base)
1211       return false;
1212 
1213   int Bytes = getLSMultipleTransferSize(MI);
1214   MachineBasicBlock &MBB = *MI->getParent();
1215   MachineBasicBlock::iterator MBBI(MI);
1216   int Offset;
1217   MachineBasicBlock::iterator MergeInstr
1218     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1219   ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1220   if (Mode == ARM_AM::ia && Offset == -Bytes) {
1221     Mode = ARM_AM::db;
1222   } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1223     Mode = ARM_AM::da;
1224   } else {
1225     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1226     if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1227         ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1228       return false;
1229   }
1230   MBB.erase(MergeInstr);
1231 
1232   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1233   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1234     .addReg(Base, getDefRegState(true)) // WB base register
1235     .addReg(Base, getKillRegState(BaseKill))
1236     .addImm(Pred).addReg(PredReg);
1237 
1238   // Transfer the rest of operands.
1239   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1240     MIB.addOperand(MI->getOperand(OpNum));
1241 
1242   // Transfer memoperands.
1243   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1244 
1245   MBB.erase(MBBI);
1246   return true;
1247 }
1248 
1249 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1250                                              ARM_AM::AddrOpc Mode) {
1251   switch (Opc) {
1252   case ARM::LDRi12:
1253     return ARM::LDR_PRE_IMM;
1254   case ARM::STRi12:
1255     return ARM::STR_PRE_IMM;
1256   case ARM::VLDRS:
1257     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1258   case ARM::VLDRD:
1259     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1260   case ARM::VSTRS:
1261     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1262   case ARM::VSTRD:
1263     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1264   case ARM::t2LDRi8:
1265   case ARM::t2LDRi12:
1266     return ARM::t2LDR_PRE;
1267   case ARM::t2STRi8:
1268   case ARM::t2STRi12:
1269     return ARM::t2STR_PRE;
1270   default: llvm_unreachable("Unhandled opcode!");
1271   }
1272 }
1273 
1274 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1275                                               ARM_AM::AddrOpc Mode) {
1276   switch (Opc) {
1277   case ARM::LDRi12:
1278     return ARM::LDR_POST_IMM;
1279   case ARM::STRi12:
1280     return ARM::STR_POST_IMM;
1281   case ARM::VLDRS:
1282     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1283   case ARM::VLDRD:
1284     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1285   case ARM::VSTRS:
1286     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1287   case ARM::VSTRD:
1288     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1289   case ARM::t2LDRi8:
1290   case ARM::t2LDRi12:
1291     return ARM::t2LDR_POST;
1292   case ARM::t2STRi8:
1293   case ARM::t2STRi12:
1294     return ARM::t2STR_POST;
1295   default: llvm_unreachable("Unhandled opcode!");
1296   }
1297 }
1298 
1299 /// Fold proceeding/trailing inc/dec of base register into the
1300 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1301 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1302   // Thumb1 doesn't have updating LDR/STR.
1303   // FIXME: Use LDM/STM with single register instead.
1304   if (isThumb1) return false;
1305 
1306   unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1307   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1308   unsigned Opcode = MI->getOpcode();
1309   DebugLoc DL = MI->getDebugLoc();
1310   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1311                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1312   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1313   if (isi32Load(Opcode) || isi32Store(Opcode))
1314     if (MI->getOperand(2).getImm() != 0)
1315       return false;
1316   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1317     return false;
1318 
1319   // Can't do the merge if the destination register is the same as the would-be
1320   // writeback register.
1321   if (MI->getOperand(0).getReg() == Base)
1322     return false;
1323 
1324   unsigned PredReg = 0;
1325   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1326   int Bytes = getLSMultipleTransferSize(MI);
1327   MachineBasicBlock &MBB = *MI->getParent();
1328   MachineBasicBlock::iterator MBBI(MI);
1329   int Offset;
1330   MachineBasicBlock::iterator MergeInstr
1331     = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1332   unsigned NewOpc;
1333   if (!isAM5 && Offset == Bytes) {
1334     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1335   } else if (Offset == -Bytes) {
1336     NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1337   } else {
1338     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1339     if (Offset == Bytes) {
1340       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1341     } else if (!isAM5 && Offset == -Bytes) {
1342       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1343     } else
1344       return false;
1345   }
1346   MBB.erase(MergeInstr);
1347 
1348   ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1349 
1350   bool isLd = isLoadSingle(Opcode);
1351   if (isAM5) {
1352     // VLDM[SD]_UPD, VSTM[SD]_UPD
1353     // (There are no base-updating versions of VLDR/VSTR instructions, but the
1354     // updating load/store-multiple instructions can be used with only one
1355     // register.)
1356     MachineOperand &MO = MI->getOperand(0);
1357     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1358       .addReg(Base, getDefRegState(true)) // WB base register
1359       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1360       .addImm(Pred).addReg(PredReg)
1361       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1362                             getKillRegState(MO.isKill())));
1363   } else if (isLd) {
1364     if (isAM2) {
1365       // LDR_PRE, LDR_POST
1366       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1367         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1368           .addReg(Base, RegState::Define)
1369           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1370       } else {
1371         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1372         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1373           .addReg(Base, RegState::Define)
1374           .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1375       }
1376     } else {
1377       // t2LDR_PRE, t2LDR_POST
1378       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1379         .addReg(Base, RegState::Define)
1380         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1381     }
1382   } else {
1383     MachineOperand &MO = MI->getOperand(0);
1384     // FIXME: post-indexed stores use am2offset_imm, which still encodes
1385     // the vestigal zero-reg offset register. When that's fixed, this clause
1386     // can be removed entirely.
1387     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1388       int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1389       // STR_PRE, STR_POST
1390       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1391         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1392         .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1393     } else {
1394       // t2STR_PRE, t2STR_POST
1395       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1396         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1397         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1398     }
1399   }
1400   MBB.erase(MBBI);
1401 
1402   return true;
1403 }
1404 
1405 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1406   unsigned Opcode = MI.getOpcode();
1407   assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1408          "Must have t2STRDi8 or t2LDRDi8");
1409   if (MI.getOperand(3).getImm() != 0)
1410     return false;
1411 
1412   // Behaviour for writeback is undefined if base register is the same as one
1413   // of the others.
1414   const MachineOperand &BaseOp = MI.getOperand(2);
1415   unsigned Base = BaseOp.getReg();
1416   const MachineOperand &Reg0Op = MI.getOperand(0);
1417   const MachineOperand &Reg1Op = MI.getOperand(1);
1418   if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1419     return false;
1420 
1421   unsigned PredReg;
1422   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1423   MachineBasicBlock::iterator MBBI(MI);
1424   MachineBasicBlock &MBB = *MI.getParent();
1425   int Offset;
1426   MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1427                                                             PredReg, Offset);
1428   unsigned NewOpc;
1429   if (Offset == 8 || Offset == -8) {
1430     NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1431   } else {
1432     MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1433     if (Offset == 8 || Offset == -8) {
1434       NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1435     } else
1436       return false;
1437   }
1438   MBB.erase(MergeInstr);
1439 
1440   DebugLoc DL = MI.getDebugLoc();
1441   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1442   if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1443     MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1444        .addReg(BaseOp.getReg(), RegState::Define);
1445   } else {
1446     assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1447     MIB.addReg(BaseOp.getReg(), RegState::Define)
1448        .addOperand(Reg0Op).addOperand(Reg1Op);
1449   }
1450   MIB.addReg(BaseOp.getReg(), RegState::Kill)
1451      .addImm(Offset).addImm(Pred).addReg(PredReg);
1452   assert(TII->get(Opcode).getNumOperands() == 6 &&
1453          TII->get(NewOpc).getNumOperands() == 7 &&
1454          "Unexpected number of operands in Opcode specification.");
1455 
1456   // Transfer implicit operands.
1457   for (const MachineOperand &MO : MI.implicit_operands())
1458     MIB.addOperand(MO);
1459   MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1460 
1461   MBB.erase(MBBI);
1462   return true;
1463 }
1464 
1465 /// Returns true if instruction is a memory operation that this pass is capable
1466 /// of operating on.
1467 static bool isMemoryOp(const MachineInstr &MI) {
1468   unsigned Opcode = MI.getOpcode();
1469   switch (Opcode) {
1470   case ARM::VLDRS:
1471   case ARM::VSTRS:
1472   case ARM::VLDRD:
1473   case ARM::VSTRD:
1474   case ARM::LDRi12:
1475   case ARM::STRi12:
1476   case ARM::tLDRi:
1477   case ARM::tSTRi:
1478   case ARM::tLDRspi:
1479   case ARM::tSTRspi:
1480   case ARM::t2LDRi8:
1481   case ARM::t2LDRi12:
1482   case ARM::t2STRi8:
1483   case ARM::t2STRi12:
1484     break;
1485   default:
1486     return false;
1487   }
1488   if (!MI.getOperand(1).isReg())
1489     return false;
1490 
1491   // When no memory operands are present, conservatively assume unaligned,
1492   // volatile, unfoldable.
1493   if (!MI.hasOneMemOperand())
1494     return false;
1495 
1496   const MachineMemOperand &MMO = **MI.memoperands_begin();
1497 
1498   // Don't touch volatile memory accesses - we may be changing their order.
1499   if (MMO.isVolatile())
1500     return false;
1501 
1502   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1503   // not.
1504   if (MMO.getAlignment() < 4)
1505     return false;
1506 
1507   // str <undef> could probably be eliminated entirely, but for now we just want
1508   // to avoid making a mess of it.
1509   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1510   if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1511     return false;
1512 
1513   // Likewise don't mess with references to undefined addresses.
1514   if (MI.getOperand(1).isUndef())
1515     return false;
1516 
1517   return true;
1518 }
1519 
1520 static void InsertLDR_STR(MachineBasicBlock &MBB,
1521                           MachineBasicBlock::iterator &MBBI,
1522                           int Offset, bool isDef,
1523                           DebugLoc DL, unsigned NewOpc,
1524                           unsigned Reg, bool RegDeadKill, bool RegUndef,
1525                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
1526                           bool OffKill, bool OffUndef,
1527                           ARMCC::CondCodes Pred, unsigned PredReg,
1528                           const TargetInstrInfo *TII, bool isT2) {
1529   if (isDef) {
1530     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1531                                       TII->get(NewOpc))
1532       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1533       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1534     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1535   } else {
1536     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1537                                       TII->get(NewOpc))
1538       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1539       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1540     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1541   }
1542 }
1543 
1544 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1545                                           MachineBasicBlock::iterator &MBBI) {
1546   MachineInstr *MI = &*MBBI;
1547   unsigned Opcode = MI->getOpcode();
1548   if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1549     return false;
1550 
1551   const MachineOperand &BaseOp = MI->getOperand(2);
1552   unsigned BaseReg = BaseOp.getReg();
1553   unsigned EvenReg = MI->getOperand(0).getReg();
1554   unsigned OddReg  = MI->getOperand(1).getReg();
1555   unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1556   unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1557 
1558   // ARM errata 602117: LDRD with base in list may result in incorrect base
1559   // register when interrupted or faulted.
1560   bool Errata602117 = EvenReg == BaseReg &&
1561     (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1562   // ARM LDRD/STRD needs consecutive registers.
1563   bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1564     (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1565 
1566   if (!Errata602117 && !NonConsecutiveRegs)
1567     return false;
1568 
1569   bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1570   bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1571   bool EvenDeadKill = isLd ?
1572     MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1573   bool EvenUndef = MI->getOperand(0).isUndef();
1574   bool OddDeadKill  = isLd ?
1575     MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1576   bool OddUndef = MI->getOperand(1).isUndef();
1577   bool BaseKill = BaseOp.isKill();
1578   bool BaseUndef = BaseOp.isUndef();
1579   bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1580   bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1581   int OffImm = getMemoryOpOffset(MI);
1582   unsigned PredReg = 0;
1583   ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
1584 
1585   if (OddRegNum > EvenRegNum && OffImm == 0) {
1586     // Ascending register numbers and no offset. It's safe to change it to a
1587     // ldm or stm.
1588     unsigned NewOpc = (isLd)
1589       ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1590       : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1591     if (isLd) {
1592       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1593         .addReg(BaseReg, getKillRegState(BaseKill))
1594         .addImm(Pred).addReg(PredReg)
1595         .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1596         .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1597       ++NumLDRD2LDM;
1598     } else {
1599       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1600         .addReg(BaseReg, getKillRegState(BaseKill))
1601         .addImm(Pred).addReg(PredReg)
1602         .addReg(EvenReg,
1603                 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1604         .addReg(OddReg,
1605                 getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1606       ++NumSTRD2STM;
1607     }
1608   } else {
1609     // Split into two instructions.
1610     unsigned NewOpc = (isLd)
1611       ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1612       : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1613     // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1614     // so adjust and use t2LDRi12 here for that.
1615     unsigned NewOpc2 = (isLd)
1616       ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1617       : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1618     DebugLoc dl = MBBI->getDebugLoc();
1619     // If this is a load and base register is killed, it may have been
1620     // re-defed by the load, make sure the first load does not clobber it.
1621     if (isLd &&
1622         (BaseKill || OffKill) &&
1623         (TRI->regsOverlap(EvenReg, BaseReg))) {
1624       assert(!TRI->regsOverlap(OddReg, BaseReg));
1625       InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1626                     OddReg, OddDeadKill, false,
1627                     BaseReg, false, BaseUndef, false, OffUndef,
1628                     Pred, PredReg, TII, isT2);
1629       InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1630                     EvenReg, EvenDeadKill, false,
1631                     BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1632                     Pred, PredReg, TII, isT2);
1633     } else {
1634       if (OddReg == EvenReg && EvenDeadKill) {
1635         // If the two source operands are the same, the kill marker is
1636         // probably on the first one. e.g.
1637         // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1638         EvenDeadKill = false;
1639         OddDeadKill = true;
1640       }
1641       // Never kill the base register in the first instruction.
1642       if (EvenReg == BaseReg)
1643         EvenDeadKill = false;
1644       InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1645                     EvenReg, EvenDeadKill, EvenUndef,
1646                     BaseReg, false, BaseUndef, false, OffUndef,
1647                     Pred, PredReg, TII, isT2);
1648       InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1649                     OddReg, OddDeadKill, OddUndef,
1650                     BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1651                     Pred, PredReg, TII, isT2);
1652     }
1653     if (isLd)
1654       ++NumLDRD2LDR;
1655     else
1656       ++NumSTRD2STR;
1657   }
1658 
1659   MBBI = MBB.erase(MBBI);
1660   return true;
1661 }
1662 
1663 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1664 /// incrementing offset into LDM / STM ops.
1665 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1666   MemOpQueue MemOps;
1667   unsigned CurrBase = 0;
1668   unsigned CurrOpc = ~0u;
1669   ARMCC::CondCodes CurrPred = ARMCC::AL;
1670   unsigned Position = 0;
1671   assert(Candidates.size() == 0);
1672   assert(MergeBaseCandidates.size() == 0);
1673   LiveRegsValid = false;
1674 
1675   for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1676        I = MBBI) {
1677     // The instruction in front of the iterator is the one we look at.
1678     MBBI = std::prev(I);
1679     if (FixInvalidRegPairOp(MBB, MBBI))
1680       continue;
1681     ++Position;
1682 
1683     if (isMemoryOp(*MBBI)) {
1684       unsigned Opcode = MBBI->getOpcode();
1685       const MachineOperand &MO = MBBI->getOperand(0);
1686       unsigned Reg = MO.getReg();
1687       unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1688       unsigned PredReg = 0;
1689       ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
1690       int Offset = getMemoryOpOffset(MBBI);
1691       if (CurrBase == 0) {
1692         // Start of a new chain.
1693         CurrBase = Base;
1694         CurrOpc  = Opcode;
1695         CurrPred = Pred;
1696         MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1697         continue;
1698       }
1699       // Note: No need to match PredReg in the next if.
1700       if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1701         // Watch out for:
1702         //   r4 := ldr [r0, #8]
1703         //   r4 := ldr [r0, #4]
1704         // or
1705         //   r0 := ldr [r0]
1706         // If a load overrides the base register or a register loaded by
1707         // another load in our chain, we cannot take this instruction.
1708         bool Overlap = false;
1709         if (isLoadSingle(Opcode)) {
1710           Overlap = (Base == Reg);
1711           if (!Overlap) {
1712             for (const MemOpQueueEntry &E : MemOps) {
1713               if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1714                 Overlap = true;
1715                 break;
1716               }
1717             }
1718           }
1719         }
1720 
1721         if (!Overlap) {
1722           // Check offset and sort memory operation into the current chain.
1723           if (Offset > MemOps.back().Offset) {
1724             MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1725             continue;
1726           } else {
1727             MemOpQueue::iterator MI, ME;
1728             for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1729               if (Offset < MI->Offset) {
1730                 // Found a place to insert.
1731                 break;
1732               }
1733               if (Offset == MI->Offset) {
1734                 // Collision, abort.
1735                 MI = ME;
1736                 break;
1737               }
1738             }
1739             if (MI != MemOps.end()) {
1740               MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1741               continue;
1742             }
1743           }
1744         }
1745       }
1746 
1747       // Don't advance the iterator; The op will start a new chain next.
1748       MBBI = I;
1749       --Position;
1750       // Fallthrough to look into existing chain.
1751     } else if (MBBI->isDebugValue()) {
1752       continue;
1753     } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1754                MBBI->getOpcode() == ARM::t2STRDi8) {
1755       // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1756       // remember them because we may still be able to merge add/sub into them.
1757       MergeBaseCandidates.push_back(MBBI);
1758     }
1759 
1760 
1761     // If we are here then the chain is broken; Extract candidates for a merge.
1762     if (MemOps.size() > 0) {
1763       FormCandidates(MemOps);
1764       // Reset for the next chain.
1765       CurrBase = 0;
1766       CurrOpc = ~0u;
1767       CurrPred = ARMCC::AL;
1768       MemOps.clear();
1769     }
1770   }
1771   if (MemOps.size() > 0)
1772     FormCandidates(MemOps);
1773 
1774   // Sort candidates so they get processed from end to begin of the basic
1775   // block later; This is necessary for liveness calculation.
1776   auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1777     return M0->InsertPos < M1->InsertPos;
1778   };
1779   std::sort(Candidates.begin(), Candidates.end(), LessThan);
1780 
1781   // Go through list of candidates and merge.
1782   bool Changed = false;
1783   for (const MergeCandidate *Candidate : Candidates) {
1784     if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1785       MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1786       // Merge preceding/trailing base inc/dec into the merged op.
1787       if (Merged) {
1788         Changed = true;
1789         unsigned Opcode = Merged->getOpcode();
1790         if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1791           MergeBaseUpdateLSDouble(*Merged);
1792         else
1793           MergeBaseUpdateLSMultiple(Merged);
1794       } else {
1795         for (MachineInstr *MI : Candidate->Instrs) {
1796           if (MergeBaseUpdateLoadStore(MI))
1797             Changed = true;
1798         }
1799       }
1800     } else {
1801       assert(Candidate->Instrs.size() == 1);
1802       if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1803         Changed = true;
1804     }
1805   }
1806   Candidates.clear();
1807   // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1808   for (MachineInstr *MI : MergeBaseCandidates)
1809     MergeBaseUpdateLSDouble(*MI);
1810   MergeBaseCandidates.clear();
1811 
1812   return Changed;
1813 }
1814 
1815 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1816 /// into the preceding stack restore so it directly restore the value of LR
1817 /// into pc.
1818 ///   ldmfd sp!, {..., lr}
1819 ///   bx lr
1820 /// or
1821 ///   ldmfd sp!, {..., lr}
1822 ///   mov pc, lr
1823 /// =>
1824 ///   ldmfd sp!, {..., pc}
1825 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1826   // Thumb1 LDM doesn't allow high registers.
1827   if (isThumb1) return false;
1828   if (MBB.empty()) return false;
1829 
1830   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1831   if (MBBI != MBB.begin() &&
1832       (MBBI->getOpcode() == ARM::BX_RET ||
1833        MBBI->getOpcode() == ARM::tBX_RET ||
1834        MBBI->getOpcode() == ARM::MOVPCLR)) {
1835     MachineBasicBlock::iterator PrevI = std::prev(MBBI);
1836     // Ignore any DBG_VALUE instructions.
1837     while (PrevI->isDebugValue() && PrevI != MBB.begin())
1838       --PrevI;
1839     MachineInstr *PrevMI = PrevI;
1840     unsigned Opcode = PrevMI->getOpcode();
1841     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1842         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1843         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1844       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1845       if (MO.getReg() != ARM::LR)
1846         return false;
1847       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1848       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1849               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1850       PrevMI->setDesc(TII->get(NewOpc));
1851       MO.setReg(ARM::PC);
1852       PrevMI->copyImplicitOps(*MBB.getParent(), *MBBI);
1853       MBB.erase(MBBI);
1854       return true;
1855     }
1856   }
1857   return false;
1858 }
1859 
1860 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
1861   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1862   if (MBBI == MBB.begin() || MBBI == MBB.end() ||
1863       MBBI->getOpcode() != ARM::tBX_RET)
1864     return false;
1865 
1866   MachineBasicBlock::iterator Prev = MBBI;
1867   --Prev;
1868   if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1869     return false;
1870 
1871   for (auto Use : Prev->uses())
1872     if (Use.isKill()) {
1873       AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
1874                          .addReg(Use.getReg(), RegState::Kill))
1875           .copyImplicitOps(*MBBI);
1876       MBB.erase(MBBI);
1877       MBB.erase(Prev);
1878       return true;
1879     }
1880 
1881   llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
1882 }
1883 
1884 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1885   MF = &Fn;
1886   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1887   TL = STI->getTargetLowering();
1888   AFI = Fn.getInfo<ARMFunctionInfo>();
1889   TII = STI->getInstrInfo();
1890   TRI = STI->getRegisterInfo();
1891 
1892   RegClassInfoValid = false;
1893   isThumb2 = AFI->isThumb2Function();
1894   isThumb1 = AFI->isThumbFunction() && !isThumb2;
1895 
1896   bool Modified = false;
1897   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1898        ++MFI) {
1899     MachineBasicBlock &MBB = *MFI;
1900     Modified |= LoadStoreMultipleOpti(MBB);
1901     if (STI->hasV5TOps())
1902       Modified |= MergeReturnIntoLDM(MBB);
1903     if (isThumb1)
1904       Modified |= CombineMovBx(MBB);
1905   }
1906 
1907   Allocator.DestroyAll();
1908   return Modified;
1909 }
1910 
1911 namespace llvm {
1912 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
1913 }
1914 
1915 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
1916   "ARM pre- register allocation load / store optimization pass"
1917 
1918 namespace {
1919   /// Pre- register allocation pass that move load / stores from consecutive
1920   /// locations close to make it more likely they will be combined later.
1921   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1922     static char ID;
1923     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
1924       initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
1925     }
1926 
1927     const DataLayout *TD;
1928     const TargetInstrInfo *TII;
1929     const TargetRegisterInfo *TRI;
1930     const ARMSubtarget *STI;
1931     MachineRegisterInfo *MRI;
1932     MachineFunction *MF;
1933 
1934     bool runOnMachineFunction(MachineFunction &Fn) override;
1935 
1936     const char *getPassName() const override {
1937       return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
1938     }
1939 
1940   private:
1941     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1942                           unsigned &NewOpc, unsigned &EvenReg,
1943                           unsigned &OddReg, unsigned &BaseReg,
1944                           int &Offset,
1945                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1946                           bool &isT2);
1947     bool RescheduleOps(MachineBasicBlock *MBB,
1948                        SmallVectorImpl<MachineInstr *> &Ops,
1949                        unsigned Base, bool isLd,
1950                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1951     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1952   };
1953   char ARMPreAllocLoadStoreOpt::ID = 0;
1954 }
1955 
1956 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
1957                 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
1958 
1959 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1960   if (AssumeMisalignedLoadStores)
1961     return false;
1962 
1963   TD = &Fn.getDataLayout();
1964   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1965   TII = STI->getInstrInfo();
1966   TRI = STI->getRegisterInfo();
1967   MRI = &Fn.getRegInfo();
1968   MF  = &Fn;
1969 
1970   bool Modified = false;
1971   for (MachineBasicBlock &MFI : Fn)
1972     Modified |= RescheduleLoadStoreInstrs(&MFI);
1973 
1974   return Modified;
1975 }
1976 
1977 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1978                                       MachineBasicBlock::iterator I,
1979                                       MachineBasicBlock::iterator E,
1980                                       SmallPtrSetImpl<MachineInstr*> &MemOps,
1981                                       SmallSet<unsigned, 4> &MemRegs,
1982                                       const TargetRegisterInfo *TRI) {
1983   // Are there stores / loads / calls between them?
1984   // FIXME: This is overly conservative. We should make use of alias information
1985   // some day.
1986   SmallSet<unsigned, 4> AddedRegPressure;
1987   while (++I != E) {
1988     if (I->isDebugValue() || MemOps.count(&*I))
1989       continue;
1990     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1991       return false;
1992     if (isLd && I->mayStore())
1993       return false;
1994     if (!isLd) {
1995       if (I->mayLoad())
1996         return false;
1997       // It's not safe to move the first 'str' down.
1998       // str r1, [r0]
1999       // strh r5, [r0]
2000       // str r4, [r0, #+4]
2001       if (I->mayStore())
2002         return false;
2003     }
2004     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
2005       MachineOperand &MO = I->getOperand(j);
2006       if (!MO.isReg())
2007         continue;
2008       unsigned Reg = MO.getReg();
2009       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
2010         return false;
2011       if (Reg != Base && !MemRegs.count(Reg))
2012         AddedRegPressure.insert(Reg);
2013     }
2014   }
2015 
2016   // Estimate register pressure increase due to the transformation.
2017   if (MemRegs.size() <= 4)
2018     // Ok if we are moving small number of instructions.
2019     return true;
2020   return AddedRegPressure.size() <= MemRegs.size() * 2;
2021 }
2022 
2023 bool
2024 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
2025                                           DebugLoc &dl, unsigned &NewOpc,
2026                                           unsigned &FirstReg,
2027                                           unsigned &SecondReg,
2028                                           unsigned &BaseReg, int &Offset,
2029                                           unsigned &PredReg,
2030                                           ARMCC::CondCodes &Pred,
2031                                           bool &isT2) {
2032   // Make sure we're allowed to generate LDRD/STRD.
2033   if (!STI->hasV5TEOps())
2034     return false;
2035 
2036   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2037   unsigned Scale = 1;
2038   unsigned Opcode = Op0->getOpcode();
2039   if (Opcode == ARM::LDRi12) {
2040     NewOpc = ARM::LDRD;
2041   } else if (Opcode == ARM::STRi12) {
2042     NewOpc = ARM::STRD;
2043   } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2044     NewOpc = ARM::t2LDRDi8;
2045     Scale = 4;
2046     isT2 = true;
2047   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2048     NewOpc = ARM::t2STRDi8;
2049     Scale = 4;
2050     isT2 = true;
2051   } else {
2052     return false;
2053   }
2054 
2055   // Make sure the base address satisfies i64 ld / st alignment requirement.
2056   // At the moment, we ignore the memoryoperand's value.
2057   // If we want to use AliasAnalysis, we should check it accordingly.
2058   if (!Op0->hasOneMemOperand() ||
2059       (*Op0->memoperands_begin())->isVolatile())
2060     return false;
2061 
2062   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2063   const Function *Func = MF->getFunction();
2064   unsigned ReqAlign = STI->hasV6Ops()
2065     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2066     : 8;  // Pre-v6 need 8-byte align
2067   if (Align < ReqAlign)
2068     return false;
2069 
2070   // Then make sure the immediate offset fits.
2071   int OffImm = getMemoryOpOffset(Op0);
2072   if (isT2) {
2073     int Limit = (1 << 8) * Scale;
2074     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2075       return false;
2076     Offset = OffImm;
2077   } else {
2078     ARM_AM::AddrOpc AddSub = ARM_AM::add;
2079     if (OffImm < 0) {
2080       AddSub = ARM_AM::sub;
2081       OffImm = - OffImm;
2082     }
2083     int Limit = (1 << 8) * Scale;
2084     if (OffImm >= Limit || (OffImm & (Scale-1)))
2085       return false;
2086     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2087   }
2088   FirstReg = Op0->getOperand(0).getReg();
2089   SecondReg = Op1->getOperand(0).getReg();
2090   if (FirstReg == SecondReg)
2091     return false;
2092   BaseReg = Op0->getOperand(1).getReg();
2093   Pred = getInstrPredicate(*Op0, PredReg);
2094   dl = Op0->getDebugLoc();
2095   return true;
2096 }
2097 
2098 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2099                                  SmallVectorImpl<MachineInstr *> &Ops,
2100                                  unsigned Base, bool isLd,
2101                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2102   bool RetVal = false;
2103 
2104   // Sort by offset (in reverse order).
2105   std::sort(Ops.begin(), Ops.end(),
2106             [](const MachineInstr *LHS, const MachineInstr *RHS) {
2107     int LOffset = getMemoryOpOffset(LHS);
2108     int ROffset = getMemoryOpOffset(RHS);
2109     assert(LHS == RHS || LOffset != ROffset);
2110     return LOffset > ROffset;
2111   });
2112 
2113   // The loads / stores of the same base are in order. Scan them from first to
2114   // last and check for the following:
2115   // 1. Any def of base.
2116   // 2. Any gaps.
2117   while (Ops.size() > 1) {
2118     unsigned FirstLoc = ~0U;
2119     unsigned LastLoc = 0;
2120     MachineInstr *FirstOp = nullptr;
2121     MachineInstr *LastOp = nullptr;
2122     int LastOffset = 0;
2123     unsigned LastOpcode = 0;
2124     unsigned LastBytes = 0;
2125     unsigned NumMove = 0;
2126     for (int i = Ops.size() - 1; i >= 0; --i) {
2127       MachineInstr *Op = Ops[i];
2128       unsigned Loc = MI2LocMap[Op];
2129       if (Loc <= FirstLoc) {
2130         FirstLoc = Loc;
2131         FirstOp = Op;
2132       }
2133       if (Loc >= LastLoc) {
2134         LastLoc = Loc;
2135         LastOp = Op;
2136       }
2137 
2138       unsigned LSMOpcode
2139         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2140       if (LastOpcode && LSMOpcode != LastOpcode)
2141         break;
2142 
2143       int Offset = getMemoryOpOffset(Op);
2144       unsigned Bytes = getLSMultipleTransferSize(Op);
2145       if (LastBytes) {
2146         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2147           break;
2148       }
2149       LastOffset = Offset;
2150       LastBytes = Bytes;
2151       LastOpcode = LSMOpcode;
2152       if (++NumMove == 8) // FIXME: Tune this limit.
2153         break;
2154     }
2155 
2156     if (NumMove <= 1)
2157       Ops.pop_back();
2158     else {
2159       SmallPtrSet<MachineInstr*, 4> MemOps;
2160       SmallSet<unsigned, 4> MemRegs;
2161       for (int i = NumMove-1; i >= 0; --i) {
2162         MemOps.insert(Ops[i]);
2163         MemRegs.insert(Ops[i]->getOperand(0).getReg());
2164       }
2165 
2166       // Be conservative, if the instructions are too far apart, don't
2167       // move them. We want to limit the increase of register pressure.
2168       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2169       if (DoMove)
2170         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2171                                            MemOps, MemRegs, TRI);
2172       if (!DoMove) {
2173         for (unsigned i = 0; i != NumMove; ++i)
2174           Ops.pop_back();
2175       } else {
2176         // This is the new location for the loads / stores.
2177         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2178         while (InsertPos != MBB->end()
2179                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2180           ++InsertPos;
2181 
2182         // If we are moving a pair of loads / stores, see if it makes sense
2183         // to try to allocate a pair of registers that can form register pairs.
2184         MachineInstr *Op0 = Ops.back();
2185         MachineInstr *Op1 = Ops[Ops.size()-2];
2186         unsigned FirstReg = 0, SecondReg = 0;
2187         unsigned BaseReg = 0, PredReg = 0;
2188         ARMCC::CondCodes Pred = ARMCC::AL;
2189         bool isT2 = false;
2190         unsigned NewOpc = 0;
2191         int Offset = 0;
2192         DebugLoc dl;
2193         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2194                                              FirstReg, SecondReg, BaseReg,
2195                                              Offset, PredReg, Pred, isT2)) {
2196           Ops.pop_back();
2197           Ops.pop_back();
2198 
2199           const MCInstrDesc &MCID = TII->get(NewOpc);
2200           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2201           MRI->constrainRegClass(FirstReg, TRC);
2202           MRI->constrainRegClass(SecondReg, TRC);
2203 
2204           // Form the pair instruction.
2205           if (isLd) {
2206             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2207               .addReg(FirstReg, RegState::Define)
2208               .addReg(SecondReg, RegState::Define)
2209               .addReg(BaseReg);
2210             // FIXME: We're converting from LDRi12 to an insn that still
2211             // uses addrmode2, so we need an explicit offset reg. It should
2212             // always by reg0 since we're transforming LDRi12s.
2213             if (!isT2)
2214               MIB.addReg(0);
2215             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2216             MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2217             DEBUG(dbgs() << "Formed " << *MIB << "\n");
2218             ++NumLDRDFormed;
2219           } else {
2220             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2221               .addReg(FirstReg)
2222               .addReg(SecondReg)
2223               .addReg(BaseReg);
2224             // FIXME: We're converting from LDRi12 to an insn that still
2225             // uses addrmode2, so we need an explicit offset reg. It should
2226             // always by reg0 since we're transforming STRi12s.
2227             if (!isT2)
2228               MIB.addReg(0);
2229             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2230             MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2231             DEBUG(dbgs() << "Formed " << *MIB << "\n");
2232             ++NumSTRDFormed;
2233           }
2234           MBB->erase(Op0);
2235           MBB->erase(Op1);
2236 
2237           if (!isT2) {
2238             // Add register allocation hints to form register pairs.
2239             MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2240             MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
2241           }
2242         } else {
2243           for (unsigned i = 0; i != NumMove; ++i) {
2244             MachineInstr *Op = Ops.back();
2245             Ops.pop_back();
2246             MBB->splice(InsertPos, MBB, Op);
2247           }
2248         }
2249 
2250         NumLdStMoved += NumMove;
2251         RetVal = true;
2252       }
2253     }
2254   }
2255 
2256   return RetVal;
2257 }
2258 
2259 bool
2260 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2261   bool RetVal = false;
2262 
2263   DenseMap<MachineInstr*, unsigned> MI2LocMap;
2264   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2265   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2266   SmallVector<unsigned, 4> LdBases;
2267   SmallVector<unsigned, 4> StBases;
2268 
2269   unsigned Loc = 0;
2270   MachineBasicBlock::iterator MBBI = MBB->begin();
2271   MachineBasicBlock::iterator E = MBB->end();
2272   while (MBBI != E) {
2273     for (; MBBI != E; ++MBBI) {
2274       MachineInstr *MI = MBBI;
2275       if (MI->isCall() || MI->isTerminator()) {
2276         // Stop at barriers.
2277         ++MBBI;
2278         break;
2279       }
2280 
2281       if (!MI->isDebugValue())
2282         MI2LocMap[MI] = ++Loc;
2283 
2284       if (!isMemoryOp(*MI))
2285         continue;
2286       unsigned PredReg = 0;
2287       if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
2288         continue;
2289 
2290       int Opc = MI->getOpcode();
2291       bool isLd = isLoadSingle(Opc);
2292       unsigned Base = MI->getOperand(1).getReg();
2293       int Offset = getMemoryOpOffset(MI);
2294 
2295       bool StopHere = false;
2296       if (isLd) {
2297         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2298           Base2LdsMap.find(Base);
2299         if (BI != Base2LdsMap.end()) {
2300           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2301             if (Offset == getMemoryOpOffset(BI->second[i])) {
2302               StopHere = true;
2303               break;
2304             }
2305           }
2306           if (!StopHere)
2307             BI->second.push_back(MI);
2308         } else {
2309           Base2LdsMap[Base].push_back(MI);
2310           LdBases.push_back(Base);
2311         }
2312       } else {
2313         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2314           Base2StsMap.find(Base);
2315         if (BI != Base2StsMap.end()) {
2316           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2317             if (Offset == getMemoryOpOffset(BI->second[i])) {
2318               StopHere = true;
2319               break;
2320             }
2321           }
2322           if (!StopHere)
2323             BI->second.push_back(MI);
2324         } else {
2325           Base2StsMap[Base].push_back(MI);
2326           StBases.push_back(Base);
2327         }
2328       }
2329 
2330       if (StopHere) {
2331         // Found a duplicate (a base+offset combination that's seen earlier).
2332         // Backtrack.
2333         --Loc;
2334         break;
2335       }
2336     }
2337 
2338     // Re-schedule loads.
2339     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2340       unsigned Base = LdBases[i];
2341       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2342       if (Lds.size() > 1)
2343         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2344     }
2345 
2346     // Re-schedule stores.
2347     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2348       unsigned Base = StBases[i];
2349       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2350       if (Sts.size() > 1)
2351         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2352     }
2353 
2354     if (MBBI != E) {
2355       Base2LdsMap.clear();
2356       Base2StsMap.clear();
2357       LdBases.clear();
2358       StBases.clear();
2359     }
2360   }
2361 
2362   return RetVal;
2363 }
2364 
2365 
2366 /// Returns an instance of the load / store optimization pass.
2367 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2368   if (PreAlloc)
2369     return new ARMPreAllocLoadStoreOpt();
2370   return new ARMLoadStoreOpt();
2371 }
2372 
2373