1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
41 using namespace llvm;
42 
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
54 
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
57 
58 namespace {
59   struct ARMLoadStoreOpt : public MachineFunctionPass {
60     static char ID;
61     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 
63     const TargetInstrInfo *TII;
64     const TargetRegisterInfo *TRI;
65     ARMFunctionInfo *AFI;
66     RegScavenger *RS;
67     bool isThumb2;
68 
69     virtual bool runOnMachineFunction(MachineFunction &Fn);
70 
71     virtual const char *getPassName() const {
72       return "ARM load / store optimization pass";
73     }
74 
75   private:
76     struct MemOpQueueEntry {
77       int Offset;
78       unsigned Reg;
79       bool isKill;
80       unsigned Position;
81       MachineBasicBlock::iterator MBBI;
82       bool Merged;
83       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
84                       MachineBasicBlock::iterator i)
85         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86     };
87     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
88     typedef MemOpQueue::iterator MemOpQueueIter;
89 
90     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
91                   int Offset, unsigned Base, bool BaseKill, int Opcode,
92                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
93                   DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
94     void MergeOpsUpdate(MachineBasicBlock &MBB,
95                         MemOpQueue &MemOps,
96                         unsigned memOpsBegin,
97                         unsigned memOpsEnd,
98                         unsigned insertAfter,
99                         int Offset,
100                         unsigned Base,
101                         bool BaseKill,
102                         int Opcode,
103                         ARMCC::CondCodes Pred,
104                         unsigned PredReg,
105                         unsigned Scratch,
106                         DebugLoc dl,
107                         SmallVector<MachineBasicBlock::iterator, 4> &Merges);
108     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
109                       int Opcode, unsigned Size,
110                       ARMCC::CondCodes Pred, unsigned PredReg,
111                       unsigned Scratch, MemOpQueue &MemOps,
112                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 
114     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
115     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
116                              MachineBasicBlock::iterator &MBBI);
117     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
118                                   MachineBasicBlock::iterator MBBI,
119                                   const TargetInstrInfo *TII,
120                                   bool &Advance,
121                                   MachineBasicBlock::iterator &I);
122     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
123                                    MachineBasicBlock::iterator MBBI,
124                                    bool &Advance,
125                                    MachineBasicBlock::iterator &I);
126     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
127     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128   };
129   char ARMLoadStoreOpt::ID = 0;
130 }
131 
132 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133   switch (Opcode) {
134   default: llvm_unreachable("Unhandled opcode!");
135   case ARM::LDRi12:
136     ++NumLDMGened;
137     switch (Mode) {
138     default: llvm_unreachable("Unhandled submode!");
139     case ARM_AM::ia: return ARM::LDMIA;
140     case ARM_AM::da: return ARM::LDMDA;
141     case ARM_AM::db: return ARM::LDMDB;
142     case ARM_AM::ib: return ARM::LDMIB;
143     }
144     break;
145   case ARM::STRi12:
146     ++NumSTMGened;
147     switch (Mode) {
148     default: llvm_unreachable("Unhandled submode!");
149     case ARM_AM::ia: return ARM::STMIA;
150     case ARM_AM::da: return ARM::STMDA;
151     case ARM_AM::db: return ARM::STMDB;
152     case ARM_AM::ib: return ARM::STMIB;
153     }
154     break;
155   case ARM::t2LDRi8:
156   case ARM::t2LDRi12:
157     ++NumLDMGened;
158     switch (Mode) {
159     default: llvm_unreachable("Unhandled submode!");
160     case ARM_AM::ia: return ARM::t2LDMIA;
161     case ARM_AM::db: return ARM::t2LDMDB;
162     }
163     break;
164   case ARM::t2STRi8:
165   case ARM::t2STRi12:
166     ++NumSTMGened;
167     switch (Mode) {
168     default: llvm_unreachable("Unhandled submode!");
169     case ARM_AM::ia: return ARM::t2STMIA;
170     case ARM_AM::db: return ARM::t2STMDB;
171     }
172     break;
173   case ARM::VLDRS:
174     ++NumVLDMGened;
175     switch (Mode) {
176     default: llvm_unreachable("Unhandled submode!");
177     case ARM_AM::ia: return ARM::VLDMSIA;
178     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
179     }
180     break;
181   case ARM::VSTRS:
182     ++NumVSTMGened;
183     switch (Mode) {
184     default: llvm_unreachable("Unhandled submode!");
185     case ARM_AM::ia: return ARM::VSTMSIA;
186     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187     }
188     break;
189   case ARM::VLDRD:
190     ++NumVLDMGened;
191     switch (Mode) {
192     default: llvm_unreachable("Unhandled submode!");
193     case ARM_AM::ia: return ARM::VLDMDIA;
194     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
195     }
196     break;
197   case ARM::VSTRD:
198     ++NumVSTMGened;
199     switch (Mode) {
200     default: llvm_unreachable("Unhandled submode!");
201     case ARM_AM::ia: return ARM::VSTMDIA;
202     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
203     }
204     break;
205   }
206 
207   return 0;
208 }
209 
210 namespace llvm {
211   namespace ARM_AM {
212 
213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214   switch (Opcode) {
215   default: llvm_unreachable("Unhandled opcode!");
216   case ARM::LDMIA_RET:
217   case ARM::LDMIA:
218   case ARM::LDMIA_UPD:
219   case ARM::STMIA:
220   case ARM::STMIA_UPD:
221   case ARM::t2LDMIA_RET:
222   case ARM::t2LDMIA:
223   case ARM::t2LDMIA_UPD:
224   case ARM::t2STMIA:
225   case ARM::t2STMIA_UPD:
226   case ARM::VLDMSIA:
227   case ARM::VLDMSIA_UPD:
228   case ARM::VSTMSIA:
229   case ARM::VSTMSIA_UPD:
230   case ARM::VLDMDIA:
231   case ARM::VLDMDIA_UPD:
232   case ARM::VSTMDIA:
233   case ARM::VSTMDIA_UPD:
234     return ARM_AM::ia;
235 
236   case ARM::LDMDA:
237   case ARM::LDMDA_UPD:
238   case ARM::STMDA:
239   case ARM::STMDA_UPD:
240     return ARM_AM::da;
241 
242   case ARM::LDMDB:
243   case ARM::LDMDB_UPD:
244   case ARM::STMDB:
245   case ARM::STMDB_UPD:
246   case ARM::t2LDMDB:
247   case ARM::t2LDMDB_UPD:
248   case ARM::t2STMDB:
249   case ARM::t2STMDB_UPD:
250   case ARM::VLDMSDB_UPD:
251   case ARM::VSTMSDB_UPD:
252   case ARM::VLDMDDB_UPD:
253   case ARM::VSTMDDB_UPD:
254     return ARM_AM::db;
255 
256   case ARM::LDMIB:
257   case ARM::LDMIB_UPD:
258   case ARM::STMIB:
259   case ARM::STMIB_UPD:
260     return ARM_AM::ib;
261   }
262 
263   return ARM_AM::bad_am_submode;
264 }
265 
266   } // end namespace ARM_AM
267 } // end namespace llvm
268 
269 static bool isT2i32Load(unsigned Opc) {
270   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 }
272 
273 static bool isi32Load(unsigned Opc) {
274   return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 }
276 
277 static bool isT2i32Store(unsigned Opc) {
278   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 }
280 
281 static bool isi32Store(unsigned Opc) {
282   return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 }
284 
285 /// MergeOps - Create and insert a LDM or STM with Base as base register and
286 /// registers in Regs as the register operands that would be loaded / stored.
287 /// It returns true if the transformation is done.
288 bool
289 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
290                           MachineBasicBlock::iterator MBBI,
291                           int Offset, unsigned Base, bool BaseKill,
292                           int Opcode, ARMCC::CondCodes Pred,
293                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
294                           SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
295   // Only a single register to load / store. Don't bother.
296   unsigned NumRegs = Regs.size();
297   if (NumRegs <= 1)
298     return false;
299 
300   ARM_AM::AMSubMode Mode = ARM_AM::ia;
301   // VFP and Thumb2 do not support IB or DA modes.
302   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
303   bool haveIBAndDA = isNotVFP && !isThumb2;
304   if (Offset == 4 && haveIBAndDA)
305     Mode = ARM_AM::ib;
306   else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307     Mode = ARM_AM::da;
308   else if (Offset == -4 * (int)NumRegs && isNotVFP)
309     // VLDM/VSTM do not support DB mode without also updating the base reg.
310     Mode = ARM_AM::db;
311   else if (Offset != 0) {
312     // Check if this is a supported opcode before we insert instructions to
313     // calculate a new base register.
314     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 
316     // If starting offset isn't zero, insert a MI to materialize a new base.
317     // But only do so if it is cost effective, i.e. merging more than two
318     // loads / stores.
319     if (NumRegs <= 2)
320       return false;
321 
322     unsigned NewBase;
323     if (isi32Load(Opcode))
324       // If it is a load, then just use one of the destination register to
325       // use as the new base.
326       NewBase = Regs[NumRegs-1].first;
327     else {
328       // Use the scratch register to use as a new base.
329       NewBase = Scratch;
330       if (NewBase == 0)
331         return false;
332     }
333     int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334     if (Offset < 0) {
335       BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
336       Offset = - Offset;
337     }
338     int ImmedOffset = isThumb2
339       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
340     if (ImmedOffset == -1)
341       // FIXME: Try t2ADDri12 or t2SUBri12?
342       return false;  // Probably not worth it then.
343 
344     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
345       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
346       .addImm(Pred).addReg(PredReg).addReg(0);
347     Base = NewBase;
348     BaseKill = true;  // New base is always killed right its use.
349   }
350 
351   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
352                 Opcode == ARM::VLDRD);
353   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
354   if (!Opcode) return false;
355   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356     .addReg(Base, getKillRegState(BaseKill))
357     .addImm(Pred).addReg(PredReg);
358   for (unsigned i = 0; i != NumRegs; ++i)
359     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360                      | getKillRegState(Regs[i].second));
361 
362   return true;
363 }
364 
365 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 // success.
367 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368                                      MemOpQueue &memOps,
369                                      unsigned memOpsBegin, unsigned memOpsEnd,
370                                      unsigned insertAfter, int Offset,
371                                      unsigned Base, bool BaseKill,
372                                      int Opcode,
373                                      ARMCC::CondCodes Pred, unsigned PredReg,
374                                      unsigned Scratch,
375                                      DebugLoc dl,
376                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377   // First calculate which of the registers should be killed by the merged
378   // instruction.
379   const unsigned insertPos = memOps[insertAfter].Position;
380   SmallSet<unsigned, 4> KilledRegs;
381   DenseMap<unsigned, unsigned> Killer;
382   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
383     if (i == memOpsBegin) {
384       i = memOpsEnd;
385       if (i == e)
386         break;
387     }
388     if (memOps[i].Position < insertPos && memOps[i].isKill) {
389       unsigned Reg = memOps[i].Reg;
390       KilledRegs.insert(Reg);
391       Killer[Reg] = i;
392     }
393   }
394 
395   SmallVector<std::pair<unsigned, bool>, 8> Regs;
396   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
397     unsigned Reg = memOps[i].Reg;
398     // If we are inserting the merged operation after an operation that
399     // uses the same register, make sure to transfer any kill flag.
400     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
401     Regs.push_back(std::make_pair(Reg, isKill));
402   }
403 
404   // Try to do the merge.
405   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406   ++Loc;
407   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
408                 Pred, PredReg, Scratch, dl, Regs))
409     return;
410 
411   // Merge succeeded, update records.
412   Merges.push_back(prior(Loc));
413   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
414     // Remove kill flags from any memops that come before insertPos.
415     if (Regs[i-memOpsBegin].second) {
416       unsigned Reg = Regs[i-memOpsBegin].first;
417       if (KilledRegs.count(Reg)) {
418         unsigned j = Killer[Reg];
419         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
420         assert(Idx >= 0 && "Cannot find killing operand");
421         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
422         memOps[j].isKill = false;
423       }
424       memOps[i].isKill = true;
425     }
426     MBB.erase(memOps[i].MBBI);
427     // Update this memop to refer to the merged instruction.
428     // We may need to move kill flags again.
429     memOps[i].Merged = true;
430     memOps[i].MBBI = Merges.back();
431     memOps[i].Position = insertPos;
432   }
433 }
434 
435 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
436 /// load / store multiple instructions.
437 void
438 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
439                           unsigned Base, int Opcode, unsigned Size,
440                           ARMCC::CondCodes Pred, unsigned PredReg,
441                           unsigned Scratch, MemOpQueue &MemOps,
442                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
443   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
444   int Offset = MemOps[SIndex].Offset;
445   int SOffset = Offset;
446   unsigned insertAfter = SIndex;
447   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
448   DebugLoc dl = Loc->getDebugLoc();
449   const MachineOperand &PMO = Loc->getOperand(0);
450   unsigned PReg = PMO.getReg();
451   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
452     : getARMRegisterNumbering(PReg);
453   unsigned Count = 1;
454   unsigned Limit = ~0U;
455 
456   // vldm / vstm limit are 32 for S variants, 16 for D variants.
457 
458   switch (Opcode) {
459   default: break;
460   case ARM::VSTRS:
461     Limit = 32;
462     break;
463   case ARM::VSTRD:
464     Limit = 16;
465     break;
466   case ARM::VLDRD:
467     Limit = 16;
468     break;
469   case ARM::VLDRS:
470     Limit = 32;
471     break;
472   }
473 
474   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
475     int NewOffset = MemOps[i].Offset;
476     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
477     unsigned Reg = MO.getReg();
478     unsigned RegNum = MO.isUndef() ? UINT_MAX
479       : getARMRegisterNumbering(Reg);
480     // Register numbers must be in ascending order. For VFP / NEON load and
481     // store multiples, the registers must also be consecutive and within the
482     // limit on the number of registers per instruction.
483     if (Reg != ARM::SP &&
484         NewOffset == Offset + (int)Size &&
485         ((isNotVFP && RegNum > PRegNum) ||
486          ((Count < Limit) && RegNum == PRegNum+1))) {
487       Offset += Size;
488       PRegNum = RegNum;
489       ++Count;
490     } else {
491       // Can't merge this in. Try merge the earlier ones first.
492       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
493                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
494       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
495                    MemOps, Merges);
496       return;
497     }
498 
499     if (MemOps[i].Position > MemOps[insertAfter].Position)
500       insertAfter = i;
501   }
502 
503   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
504   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
505                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
506   return;
507 }
508 
509 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
510                                        unsigned Bytes, unsigned Limit,
511                                        ARMCC::CondCodes Pred, unsigned PredReg){
512   unsigned MyPredReg = 0;
513   if (!MI)
514     return false;
515   if (MI->getOpcode() != ARM::t2SUBri &&
516       MI->getOpcode() != ARM::tSUBspi &&
517       MI->getOpcode() != ARM::SUBri)
518     return false;
519 
520   // Make sure the offset fits in 8 bits.
521   if (Bytes == 0 || (Limit && Bytes >= Limit))
522     return false;
523 
524   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
525   return (MI->getOperand(0).getReg() == Base &&
526           MI->getOperand(1).getReg() == Base &&
527           (MI->getOperand(2).getImm()*Scale) == Bytes &&
528           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
529           MyPredReg == PredReg);
530 }
531 
532 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
533                                        unsigned Bytes, unsigned Limit,
534                                        ARMCC::CondCodes Pred, unsigned PredReg){
535   unsigned MyPredReg = 0;
536   if (!MI)
537     return false;
538   if (MI->getOpcode() != ARM::t2ADDri &&
539       MI->getOpcode() != ARM::tADDspi &&
540       MI->getOpcode() != ARM::ADDri)
541     return false;
542 
543   if (Bytes == 0 || (Limit && Bytes >= Limit))
544     // Make sure the offset fits in 8 bits.
545     return false;
546 
547   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
548   return (MI->getOperand(0).getReg() == Base &&
549           MI->getOperand(1).getReg() == Base &&
550           (MI->getOperand(2).getImm()*Scale) == Bytes &&
551           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
552           MyPredReg == PredReg);
553 }
554 
555 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
556   switch (MI->getOpcode()) {
557   default: return 0;
558   case ARM::LDRi12:
559   case ARM::STRi12:
560   case ARM::t2LDRi8:
561   case ARM::t2LDRi12:
562   case ARM::t2STRi8:
563   case ARM::t2STRi12:
564   case ARM::VLDRS:
565   case ARM::VSTRS:
566     return 4;
567   case ARM::VLDRD:
568   case ARM::VSTRD:
569     return 8;
570   case ARM::LDMIA:
571   case ARM::LDMDA:
572   case ARM::LDMDB:
573   case ARM::LDMIB:
574   case ARM::STMIA:
575   case ARM::STMDA:
576   case ARM::STMDB:
577   case ARM::STMIB:
578   case ARM::t2LDMIA:
579   case ARM::t2LDMDB:
580   case ARM::t2STMIA:
581   case ARM::t2STMDB:
582   case ARM::VLDMSIA:
583   case ARM::VSTMSIA:
584     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
585   case ARM::VLDMDIA:
586   case ARM::VSTMDIA:
587     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588   }
589 }
590 
591 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
592                                             ARM_AM::AMSubMode Mode) {
593   switch (Opc) {
594   default: llvm_unreachable("Unhandled opcode!");
595   case ARM::LDMIA:
596   case ARM::LDMDA:
597   case ARM::LDMDB:
598   case ARM::LDMIB:
599     switch (Mode) {
600     default: llvm_unreachable("Unhandled submode!");
601     case ARM_AM::ia: return ARM::LDMIA_UPD;
602     case ARM_AM::ib: return ARM::LDMIB_UPD;
603     case ARM_AM::da: return ARM::LDMDA_UPD;
604     case ARM_AM::db: return ARM::LDMDB_UPD;
605     }
606     break;
607   case ARM::STMIA:
608   case ARM::STMDA:
609   case ARM::STMDB:
610   case ARM::STMIB:
611     switch (Mode) {
612     default: llvm_unreachable("Unhandled submode!");
613     case ARM_AM::ia: return ARM::STMIA_UPD;
614     case ARM_AM::ib: return ARM::STMIB_UPD;
615     case ARM_AM::da: return ARM::STMDA_UPD;
616     case ARM_AM::db: return ARM::STMDB_UPD;
617     }
618     break;
619   case ARM::t2LDMIA:
620   case ARM::t2LDMDB:
621     switch (Mode) {
622     default: llvm_unreachable("Unhandled submode!");
623     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
624     case ARM_AM::db: return ARM::t2LDMDB_UPD;
625     }
626     break;
627   case ARM::t2STMIA:
628   case ARM::t2STMDB:
629     switch (Mode) {
630     default: llvm_unreachable("Unhandled submode!");
631     case ARM_AM::ia: return ARM::t2STMIA_UPD;
632     case ARM_AM::db: return ARM::t2STMDB_UPD;
633     }
634     break;
635   case ARM::VLDMSIA:
636     switch (Mode) {
637     default: llvm_unreachable("Unhandled submode!");
638     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
639     case ARM_AM::db: return ARM::VLDMSDB_UPD;
640     }
641     break;
642   case ARM::VLDMDIA:
643     switch (Mode) {
644     default: llvm_unreachable("Unhandled submode!");
645     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
646     case ARM_AM::db: return ARM::VLDMDDB_UPD;
647     }
648     break;
649   case ARM::VSTMSIA:
650     switch (Mode) {
651     default: llvm_unreachable("Unhandled submode!");
652     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653     case ARM_AM::db: return ARM::VSTMSDB_UPD;
654     }
655     break;
656   case ARM::VSTMDIA:
657     switch (Mode) {
658     default: llvm_unreachable("Unhandled submode!");
659     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
660     case ARM_AM::db: return ARM::VSTMDDB_UPD;
661     }
662     break;
663   }
664 
665   return 0;
666 }
667 
668 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
669 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 ///
671 /// stmia rn, <ra, rb, rc>
672 /// rn := rn + 4 * 3;
673 /// =>
674 /// stmia rn!, <ra, rb, rc>
675 ///
676 /// rn := rn - 4 * 3;
677 /// ldmia rn, <ra, rb, rc>
678 /// =>
679 /// ldmdb rn!, <ra, rb, rc>
680 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
681                                                MachineBasicBlock::iterator MBBI,
682                                                bool &Advance,
683                                                MachineBasicBlock::iterator &I) {
684   MachineInstr *MI = MBBI;
685   unsigned Base = MI->getOperand(0).getReg();
686   bool BaseKill = MI->getOperand(0).isKill();
687   unsigned Bytes = getLSMultipleTransferSize(MI);
688   unsigned PredReg = 0;
689   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
690   int Opcode = MI->getOpcode();
691   DebugLoc dl = MI->getDebugLoc();
692 
693   // Can't use an updating ld/st if the base register is also a dest
694   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
695   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
696     if (MI->getOperand(i).getReg() == Base)
697       return false;
698 
699   bool DoMerge = false;
700   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701 
702   // Try merging with the previous instruction.
703   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
704   if (MBBI != BeginMBBI) {
705     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
706     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707       --PrevMBBI;
708     if (Mode == ARM_AM::ia &&
709         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
710       Mode = ARM_AM::db;
711       DoMerge = true;
712     } else if (Mode == ARM_AM::ib &&
713                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
714       Mode = ARM_AM::da;
715       DoMerge = true;
716     }
717     if (DoMerge)
718       MBB.erase(PrevMBBI);
719   }
720 
721   // Try merging with the next instruction.
722   MachineBasicBlock::iterator EndMBBI = MBB.end();
723   if (!DoMerge && MBBI != EndMBBI) {
724     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
725     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726       ++NextMBBI;
727     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
728         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729       DoMerge = true;
730     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
731                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732       DoMerge = true;
733     }
734     if (DoMerge) {
735       if (NextMBBI == I) {
736         Advance = true;
737         ++I;
738       }
739       MBB.erase(NextMBBI);
740     }
741   }
742 
743   if (!DoMerge)
744     return false;
745 
746   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
747   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
748     .addReg(Base, getDefRegState(true)) // WB base register
749     .addReg(Base, getKillRegState(BaseKill))
750     .addImm(Pred).addReg(PredReg);
751 
752   // Transfer the rest of operands.
753   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
754     MIB.addOperand(MI->getOperand(OpNum));
755 
756   // Transfer memoperands.
757   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
758 
759   MBB.erase(MBBI);
760   return true;
761 }
762 
763 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
764                                              ARM_AM::AddrOpc Mode) {
765   switch (Opc) {
766   case ARM::LDRi12:
767     return ARM::LDR_PRE;
768   case ARM::STRi12:
769     return ARM::STR_PRE_IMM;
770   case ARM::VLDRS:
771     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772   case ARM::VLDRD:
773     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774   case ARM::VSTRS:
775     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776   case ARM::VSTRD:
777     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
778   case ARM::t2LDRi8:
779   case ARM::t2LDRi12:
780     return ARM::t2LDR_PRE;
781   case ARM::t2STRi8:
782   case ARM::t2STRi12:
783     return ARM::t2STR_PRE;
784   default: llvm_unreachable("Unhandled opcode!");
785   }
786   return 0;
787 }
788 
789 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
790                                               ARM_AM::AddrOpc Mode) {
791   switch (Opc) {
792   case ARM::LDRi12:
793     return ARM::LDR_POST_IMM;
794   case ARM::STRi12:
795     return ARM::STR_POST_IMM;
796   case ARM::VLDRS:
797     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798   case ARM::VLDRD:
799     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800   case ARM::VSTRS:
801     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802   case ARM::VSTRD:
803     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
804   case ARM::t2LDRi8:
805   case ARM::t2LDRi12:
806     return ARM::t2LDR_POST;
807   case ARM::t2STRi8:
808   case ARM::t2STRi12:
809     return ARM::t2STR_POST;
810   default: llvm_unreachable("Unhandled opcode!");
811   }
812   return 0;
813 }
814 
815 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
816 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
817 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
818                                                MachineBasicBlock::iterator MBBI,
819                                                const TargetInstrInfo *TII,
820                                                bool &Advance,
821                                                MachineBasicBlock::iterator &I) {
822   MachineInstr *MI = MBBI;
823   unsigned Base = MI->getOperand(1).getReg();
824   bool BaseKill = MI->getOperand(1).isKill();
825   unsigned Bytes = getLSMultipleTransferSize(MI);
826   int Opcode = MI->getOpcode();
827   DebugLoc dl = MI->getDebugLoc();
828   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
829                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
830   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
831   if (isi32Load(Opcode) || isi32Store(Opcode))
832     if (MI->getOperand(2).getImm() != 0)
833       return false;
834   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
835     return false;
836 
837   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
838   // Can't do the merge if the destination register is the same as the would-be
839   // writeback register.
840   if (isLd && MI->getOperand(0).getReg() == Base)
841     return false;
842 
843   unsigned PredReg = 0;
844   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
845   bool DoMerge = false;
846   ARM_AM::AddrOpc AddSub = ARM_AM::add;
847   unsigned NewOpc = 0;
848   // AM2 - 12 bits, thumb2 - 8 bits.
849   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850 
851   // Try merging with the previous instruction.
852   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
853   if (MBBI != BeginMBBI) {
854     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
855     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856       --PrevMBBI;
857     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858       DoMerge = true;
859       AddSub = ARM_AM::sub;
860     } else if (!isAM5 &&
861                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
862       DoMerge = true;
863     }
864     if (DoMerge) {
865       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
866       MBB.erase(PrevMBBI);
867     }
868   }
869 
870   // Try merging with the next instruction.
871   MachineBasicBlock::iterator EndMBBI = MBB.end();
872   if (!DoMerge && MBBI != EndMBBI) {
873     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
874     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
875       ++NextMBBI;
876     if (!isAM5 &&
877         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878       DoMerge = true;
879       AddSub = ARM_AM::sub;
880     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
881       DoMerge = true;
882     }
883     if (DoMerge) {
884       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
885       if (NextMBBI == I) {
886         Advance = true;
887         ++I;
888       }
889       MBB.erase(NextMBBI);
890     }
891   }
892 
893   if (!DoMerge)
894     return false;
895 
896   unsigned Offset = 0;
897   // FIXME: Loads still use a combined reg/imm offset operand. When
898   // AM2 refactoring is complete, this can go away and just always use
899   // the raw Offset value.
900   if (isAM2 && isLd)
901     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
902   else if (!isAM5)
903     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
904 
905   if (isAM5) {
906     // VLDM[SD}_UPD, VSTM[SD]_UPD
907     // (There are no base-updating versions of VLDR/VSTR instructions, but the
908     // updating load/store-multiple instructions can be used with only one
909     // register.)
910     MachineOperand &MO = MI->getOperand(0);
911     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
912       .addReg(Base, getDefRegState(true)) // WB base register
913       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
914       .addImm(Pred).addReg(PredReg)
915       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
916                             getKillRegState(MO.isKill())));
917   } else if (isLd) {
918     if (isAM2)
919       // LDR_PRE, LDR_POST,
920       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
921         .addReg(Base, RegState::Define)
922         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
923     else
924       // t2LDR_PRE, t2LDR_POST
925       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
926         .addReg(Base, RegState::Define)
927         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
928   } else {
929     MachineOperand &MO = MI->getOperand(0);
930     // FIXME: post-indexed stores use am2offset_imm, which still encodes
931     // the vestigal zero-reg offset register. When that's fixed, this clause
932     // can be removed entirely.
933     if (isAM2 && NewOpc == ARM::STR_POST_IMM)
934       // STR_PRE, STR_POST
935       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
936         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
937         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
938     else
939       // t2STR_PRE, t2STR_POST
940       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
941         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
942         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
943   }
944   MBB.erase(MBBI);
945 
946   return true;
947 }
948 
949 /// isMemoryOp - Returns true if instruction is a memory operation that this
950 /// pass is capable of operating on.
951 static bool isMemoryOp(const MachineInstr *MI) {
952   // When no memory operands are present, conservatively assume unaligned,
953   // volatile, unfoldable.
954   if (!MI->hasOneMemOperand())
955     return false;
956 
957   const MachineMemOperand *MMO = *MI->memoperands_begin();
958 
959   // Don't touch volatile memory accesses - we may be changing their order.
960   if (MMO->isVolatile())
961     return false;
962 
963   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
964   // not.
965   if (MMO->getAlignment() < 4)
966     return false;
967 
968   // str <undef> could probably be eliminated entirely, but for now we just want
969   // to avoid making a mess of it.
970   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
971   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
972       MI->getOperand(0).isUndef())
973     return false;
974 
975   // Likewise don't mess with references to undefined addresses.
976   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
977       MI->getOperand(1).isUndef())
978     return false;
979 
980   int Opcode = MI->getOpcode();
981   switch (Opcode) {
982   default: break;
983   case ARM::VLDRS:
984   case ARM::VSTRS:
985     return MI->getOperand(1).isReg();
986   case ARM::VLDRD:
987   case ARM::VSTRD:
988     return MI->getOperand(1).isReg();
989   case ARM::LDRi12:
990   case ARM::STRi12:
991   case ARM::t2LDRi8:
992   case ARM::t2LDRi12:
993   case ARM::t2STRi8:
994   case ARM::t2STRi12:
995     return MI->getOperand(1).isReg();
996   }
997   return false;
998 }
999 
1000 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1001 /// op that is being merged.
1002 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1003   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1004   unsigned Position = MemOps[0].Position;
1005   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1006     if (MemOps[i].Position < Position) {
1007       Position = MemOps[i].Position;
1008       Loc = MemOps[i].MBBI;
1009     }
1010   }
1011 
1012   if (Loc != MBB.begin())
1013     RS->forward(prior(Loc));
1014 }
1015 
1016 static int getMemoryOpOffset(const MachineInstr *MI) {
1017   int Opcode = MI->getOpcode();
1018   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1019   unsigned NumOperands = MI->getDesc().getNumOperands();
1020   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1021 
1022   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1023       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1024       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1025       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1026     return OffField;
1027 
1028   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1029     : ARM_AM::getAM5Offset(OffField) * 4;
1030   if (isAM3) {
1031     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1032       Offset = -Offset;
1033   } else {
1034     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1035       Offset = -Offset;
1036   }
1037   return Offset;
1038 }
1039 
1040 static void InsertLDR_STR(MachineBasicBlock &MBB,
1041                           MachineBasicBlock::iterator &MBBI,
1042                           int Offset, bool isDef,
1043                           DebugLoc dl, unsigned NewOpc,
1044                           unsigned Reg, bool RegDeadKill, bool RegUndef,
1045                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
1046                           bool OffKill, bool OffUndef,
1047                           ARMCC::CondCodes Pred, unsigned PredReg,
1048                           const TargetInstrInfo *TII, bool isT2) {
1049   if (isDef) {
1050     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1051                                       TII->get(NewOpc))
1052       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1053       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1054     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1055   } else {
1056     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1057                                       TII->get(NewOpc))
1058       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1059       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1060     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1061   }
1062 }
1063 
1064 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1065                                           MachineBasicBlock::iterator &MBBI) {
1066   MachineInstr *MI = &*MBBI;
1067   unsigned Opcode = MI->getOpcode();
1068   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1069       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1070     unsigned EvenReg = MI->getOperand(0).getReg();
1071     unsigned OddReg  = MI->getOperand(1).getReg();
1072     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1073     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1074     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1075       return false;
1076 
1077     MachineBasicBlock::iterator NewBBI = MBBI;
1078     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1079     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1080     bool EvenDeadKill = isLd ?
1081       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1082     bool EvenUndef = MI->getOperand(0).isUndef();
1083     bool OddDeadKill  = isLd ?
1084       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1085     bool OddUndef = MI->getOperand(1).isUndef();
1086     const MachineOperand &BaseOp = MI->getOperand(2);
1087     unsigned BaseReg = BaseOp.getReg();
1088     bool BaseKill = BaseOp.isKill();
1089     bool BaseUndef = BaseOp.isUndef();
1090     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1091     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1092     int OffImm = getMemoryOpOffset(MI);
1093     unsigned PredReg = 0;
1094     ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1095 
1096     if (OddRegNum > EvenRegNum && OffImm == 0) {
1097       // Ascending register numbers and no offset. It's safe to change it to a
1098       // ldm or stm.
1099       unsigned NewOpc = (isLd)
1100         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1101         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1102       if (isLd) {
1103         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1104           .addReg(BaseReg, getKillRegState(BaseKill))
1105           .addImm(Pred).addReg(PredReg)
1106           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1107           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1108         ++NumLDRD2LDM;
1109       } else {
1110         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1111           .addReg(BaseReg, getKillRegState(BaseKill))
1112           .addImm(Pred).addReg(PredReg)
1113           .addReg(EvenReg,
1114                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1115           .addReg(OddReg,
1116                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1117         ++NumSTRD2STM;
1118       }
1119       NewBBI = llvm::prior(MBBI);
1120     } else {
1121       // Split into two instructions.
1122       unsigned NewOpc = (isLd)
1123         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1124         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1125       DebugLoc dl = MBBI->getDebugLoc();
1126       // If this is a load and base register is killed, it may have been
1127       // re-defed by the load, make sure the first load does not clobber it.
1128       if (isLd &&
1129           (BaseKill || OffKill) &&
1130           (TRI->regsOverlap(EvenReg, BaseReg))) {
1131         assert(!TRI->regsOverlap(OddReg, BaseReg));
1132         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1133                       OddReg, OddDeadKill, false,
1134                       BaseReg, false, BaseUndef, false, OffUndef,
1135                       Pred, PredReg, TII, isT2);
1136         NewBBI = llvm::prior(MBBI);
1137         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1138                       EvenReg, EvenDeadKill, false,
1139                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1140                       Pred, PredReg, TII, isT2);
1141       } else {
1142         if (OddReg == EvenReg && EvenDeadKill) {
1143           // If the two source operands are the same, the kill marker is
1144           // probably on the first one. e.g.
1145           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1146           EvenDeadKill = false;
1147           OddDeadKill = true;
1148         }
1149         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1150                       EvenReg, EvenDeadKill, EvenUndef,
1151                       BaseReg, false, BaseUndef, false, OffUndef,
1152                       Pred, PredReg, TII, isT2);
1153         NewBBI = llvm::prior(MBBI);
1154         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1155                       OddReg, OddDeadKill, OddUndef,
1156                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1157                       Pred, PredReg, TII, isT2);
1158       }
1159       if (isLd)
1160         ++NumLDRD2LDR;
1161       else
1162         ++NumSTRD2STR;
1163     }
1164 
1165     MBB.erase(MI);
1166     MBBI = NewBBI;
1167     return true;
1168   }
1169   return false;
1170 }
1171 
1172 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1173 /// ops of the same base and incrementing offset into LDM / STM ops.
1174 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1175   unsigned NumMerges = 0;
1176   unsigned NumMemOps = 0;
1177   MemOpQueue MemOps;
1178   unsigned CurrBase = 0;
1179   int CurrOpc = -1;
1180   unsigned CurrSize = 0;
1181   ARMCC::CondCodes CurrPred = ARMCC::AL;
1182   unsigned CurrPredReg = 0;
1183   unsigned Position = 0;
1184   SmallVector<MachineBasicBlock::iterator,4> Merges;
1185 
1186   RS->enterBasicBlock(&MBB);
1187   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1188   while (MBBI != E) {
1189     if (FixInvalidRegPairOp(MBB, MBBI))
1190       continue;
1191 
1192     bool Advance  = false;
1193     bool TryMerge = false;
1194     bool Clobber  = false;
1195 
1196     bool isMemOp = isMemoryOp(MBBI);
1197     if (isMemOp) {
1198       int Opcode = MBBI->getOpcode();
1199       unsigned Size = getLSMultipleTransferSize(MBBI);
1200       const MachineOperand &MO = MBBI->getOperand(0);
1201       unsigned Reg = MO.getReg();
1202       bool isKill = MO.isDef() ? false : MO.isKill();
1203       unsigned Base = MBBI->getOperand(1).getReg();
1204       unsigned PredReg = 0;
1205       ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1206       int Offset = getMemoryOpOffset(MBBI);
1207       // Watch out for:
1208       // r4 := ldr [r5]
1209       // r5 := ldr [r5, #4]
1210       // r6 := ldr [r5, #8]
1211       //
1212       // The second ldr has effectively broken the chain even though it
1213       // looks like the later ldr(s) use the same base register. Try to
1214       // merge the ldr's so far, including this one. But don't try to
1215       // combine the following ldr(s).
1216       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1217       if (CurrBase == 0 && !Clobber) {
1218         // Start of a new chain.
1219         CurrBase = Base;
1220         CurrOpc  = Opcode;
1221         CurrSize = Size;
1222         CurrPred = Pred;
1223         CurrPredReg = PredReg;
1224         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1225         ++NumMemOps;
1226         Advance = true;
1227       } else {
1228         if (Clobber) {
1229           TryMerge = true;
1230           Advance = true;
1231         }
1232 
1233         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1234           // No need to match PredReg.
1235           // Continue adding to the queue.
1236           if (Offset > MemOps.back().Offset) {
1237             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1238                                              Position, MBBI));
1239             ++NumMemOps;
1240             Advance = true;
1241           } else {
1242             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1243                  I != E; ++I) {
1244               if (Offset < I->Offset) {
1245                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1246                                                  Position, MBBI));
1247                 ++NumMemOps;
1248                 Advance = true;
1249                 break;
1250               } else if (Offset == I->Offset) {
1251                 // Collision! This can't be merged!
1252                 break;
1253               }
1254             }
1255           }
1256         }
1257       }
1258     }
1259 
1260     if (MBBI->isDebugValue()) {
1261       ++MBBI;
1262       if (MBBI == E)
1263         // Reach the end of the block, try merging the memory instructions.
1264         TryMerge = true;
1265     } else if (Advance) {
1266       ++Position;
1267       ++MBBI;
1268       if (MBBI == E)
1269         // Reach the end of the block, try merging the memory instructions.
1270         TryMerge = true;
1271     } else
1272       TryMerge = true;
1273 
1274     if (TryMerge) {
1275       if (NumMemOps > 1) {
1276         // Try to find a free register to use as a new base in case it's needed.
1277         // First advance to the instruction just before the start of the chain.
1278         AdvanceRS(MBB, MemOps);
1279         // Find a scratch register.
1280         unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1281         // Process the load / store instructions.
1282         RS->forward(prior(MBBI));
1283 
1284         // Merge ops.
1285         Merges.clear();
1286         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1287                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1288 
1289         // Try folding preceding/trailing base inc/dec into the generated
1290         // LDM/STM ops.
1291         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1292           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1293             ++NumMerges;
1294         NumMerges += Merges.size();
1295 
1296         // Try folding preceding/trailing base inc/dec into those load/store
1297         // that were not merged to form LDM/STM ops.
1298         for (unsigned i = 0; i != NumMemOps; ++i)
1299           if (!MemOps[i].Merged)
1300             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1301               ++NumMerges;
1302 
1303         // RS may be pointing to an instruction that's deleted.
1304         RS->skipTo(prior(MBBI));
1305       } else if (NumMemOps == 1) {
1306         // Try folding preceding/trailing base inc/dec into the single
1307         // load/store.
1308         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1309           ++NumMerges;
1310           RS->forward(prior(MBBI));
1311         }
1312       }
1313 
1314       CurrBase = 0;
1315       CurrOpc = -1;
1316       CurrSize = 0;
1317       CurrPred = ARMCC::AL;
1318       CurrPredReg = 0;
1319       if (NumMemOps) {
1320         MemOps.clear();
1321         NumMemOps = 0;
1322       }
1323 
1324       // If iterator hasn't been advanced and this is not a memory op, skip it.
1325       // It can't start a new chain anyway.
1326       if (!Advance && !isMemOp && MBBI != E) {
1327         ++Position;
1328         ++MBBI;
1329       }
1330     }
1331   }
1332   return NumMerges > 0;
1333 }
1334 
1335 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1336 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1337 /// directly restore the value of LR into pc.
1338 ///   ldmfd sp!, {..., lr}
1339 ///   bx lr
1340 /// or
1341 ///   ldmfd sp!, {..., lr}
1342 ///   mov pc, lr
1343 /// =>
1344 ///   ldmfd sp!, {..., pc}
1345 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1346   if (MBB.empty()) return false;
1347 
1348   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1349   if (MBBI != MBB.begin() &&
1350       (MBBI->getOpcode() == ARM::BX_RET ||
1351        MBBI->getOpcode() == ARM::tBX_RET ||
1352        MBBI->getOpcode() == ARM::MOVPCLR)) {
1353     MachineInstr *PrevMI = prior(MBBI);
1354     unsigned Opcode = PrevMI->getOpcode();
1355     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1356         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1357         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1358       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1359       if (MO.getReg() != ARM::LR)
1360         return false;
1361       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1362       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1363               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1364       PrevMI->setDesc(TII->get(NewOpc));
1365       MO.setReg(ARM::PC);
1366       PrevMI->copyImplicitOps(&*MBBI);
1367       MBB.erase(MBBI);
1368       return true;
1369     }
1370   }
1371   return false;
1372 }
1373 
1374 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1375   const TargetMachine &TM = Fn.getTarget();
1376   AFI = Fn.getInfo<ARMFunctionInfo>();
1377   TII = TM.getInstrInfo();
1378   TRI = TM.getRegisterInfo();
1379   RS = new RegScavenger();
1380   isThumb2 = AFI->isThumb2Function();
1381 
1382   bool Modified = false;
1383   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1384        ++MFI) {
1385     MachineBasicBlock &MBB = *MFI;
1386     Modified |= LoadStoreMultipleOpti(MBB);
1387     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1388       Modified |= MergeReturnIntoLDM(MBB);
1389   }
1390 
1391   delete RS;
1392   return Modified;
1393 }
1394 
1395 
1396 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1397 /// load / stores from consecutive locations close to make it more
1398 /// likely they will be combined later.
1399 
1400 namespace {
1401   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1402     static char ID;
1403     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1404 
1405     const TargetData *TD;
1406     const TargetInstrInfo *TII;
1407     const TargetRegisterInfo *TRI;
1408     const ARMSubtarget *STI;
1409     MachineRegisterInfo *MRI;
1410     MachineFunction *MF;
1411 
1412     virtual bool runOnMachineFunction(MachineFunction &Fn);
1413 
1414     virtual const char *getPassName() const {
1415       return "ARM pre- register allocation load / store optimization pass";
1416     }
1417 
1418   private:
1419     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1420                           unsigned &NewOpc, unsigned &EvenReg,
1421                           unsigned &OddReg, unsigned &BaseReg,
1422                           int &Offset,
1423                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1424                           bool &isT2);
1425     bool RescheduleOps(MachineBasicBlock *MBB,
1426                        SmallVector<MachineInstr*, 4> &Ops,
1427                        unsigned Base, bool isLd,
1428                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1429     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1430   };
1431   char ARMPreAllocLoadStoreOpt::ID = 0;
1432 }
1433 
1434 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1435   TD  = Fn.getTarget().getTargetData();
1436   TII = Fn.getTarget().getInstrInfo();
1437   TRI = Fn.getTarget().getRegisterInfo();
1438   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1439   MRI = &Fn.getRegInfo();
1440   MF  = &Fn;
1441 
1442   bool Modified = false;
1443   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1444        ++MFI)
1445     Modified |= RescheduleLoadStoreInstrs(MFI);
1446 
1447   return Modified;
1448 }
1449 
1450 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1451                                       MachineBasicBlock::iterator I,
1452                                       MachineBasicBlock::iterator E,
1453                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
1454                                       SmallSet<unsigned, 4> &MemRegs,
1455                                       const TargetRegisterInfo *TRI) {
1456   // Are there stores / loads / calls between them?
1457   // FIXME: This is overly conservative. We should make use of alias information
1458   // some day.
1459   SmallSet<unsigned, 4> AddedRegPressure;
1460   while (++I != E) {
1461     if (I->isDebugValue() || MemOps.count(&*I))
1462       continue;
1463     const MCInstrDesc &MCID = I->getDesc();
1464     if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1465       return false;
1466     if (isLd && MCID.mayStore())
1467       return false;
1468     if (!isLd) {
1469       if (MCID.mayLoad())
1470         return false;
1471       // It's not safe to move the first 'str' down.
1472       // str r1, [r0]
1473       // strh r5, [r0]
1474       // str r4, [r0, #+4]
1475       if (MCID.mayStore())
1476         return false;
1477     }
1478     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1479       MachineOperand &MO = I->getOperand(j);
1480       if (!MO.isReg())
1481         continue;
1482       unsigned Reg = MO.getReg();
1483       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1484         return false;
1485       if (Reg != Base && !MemRegs.count(Reg))
1486         AddedRegPressure.insert(Reg);
1487     }
1488   }
1489 
1490   // Estimate register pressure increase due to the transformation.
1491   if (MemRegs.size() <= 4)
1492     // Ok if we are moving small number of instructions.
1493     return true;
1494   return AddedRegPressure.size() <= MemRegs.size() * 2;
1495 }
1496 
1497 bool
1498 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1499                                           DebugLoc &dl,
1500                                           unsigned &NewOpc, unsigned &EvenReg,
1501                                           unsigned &OddReg, unsigned &BaseReg,
1502                                           int &Offset, unsigned &PredReg,
1503                                           ARMCC::CondCodes &Pred,
1504                                           bool &isT2) {
1505   // Make sure we're allowed to generate LDRD/STRD.
1506   if (!STI->hasV5TEOps())
1507     return false;
1508 
1509   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1510   unsigned Scale = 1;
1511   unsigned Opcode = Op0->getOpcode();
1512   if (Opcode == ARM::LDRi12)
1513     NewOpc = ARM::LDRD;
1514   else if (Opcode == ARM::STRi12)
1515     NewOpc = ARM::STRD;
1516   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1517     NewOpc = ARM::t2LDRDi8;
1518     Scale = 4;
1519     isT2 = true;
1520   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1521     NewOpc = ARM::t2STRDi8;
1522     Scale = 4;
1523     isT2 = true;
1524   } else
1525     return false;
1526 
1527   // Make sure the base address satisfies i64 ld / st alignment requirement.
1528   if (!Op0->hasOneMemOperand() ||
1529       !(*Op0->memoperands_begin())->getValue() ||
1530       (*Op0->memoperands_begin())->isVolatile())
1531     return false;
1532 
1533   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1534   const Function *Func = MF->getFunction();
1535   unsigned ReqAlign = STI->hasV6Ops()
1536     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1537     : 8;  // Pre-v6 need 8-byte align
1538   if (Align < ReqAlign)
1539     return false;
1540 
1541   // Then make sure the immediate offset fits.
1542   int OffImm = getMemoryOpOffset(Op0);
1543   if (isT2) {
1544     int Limit = (1 << 8) * Scale;
1545     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1546       return false;
1547     Offset = OffImm;
1548   } else {
1549     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1550     if (OffImm < 0) {
1551       AddSub = ARM_AM::sub;
1552       OffImm = - OffImm;
1553     }
1554     int Limit = (1 << 8) * Scale;
1555     if (OffImm >= Limit || (OffImm & (Scale-1)))
1556       return false;
1557     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1558   }
1559   EvenReg = Op0->getOperand(0).getReg();
1560   OddReg  = Op1->getOperand(0).getReg();
1561   if (EvenReg == OddReg)
1562     return false;
1563   BaseReg = Op0->getOperand(1).getReg();
1564   Pred = llvm::getInstrPredicate(Op0, PredReg);
1565   dl = Op0->getDebugLoc();
1566   return true;
1567 }
1568 
1569 namespace {
1570   struct OffsetCompare {
1571     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1572       int LOffset = getMemoryOpOffset(LHS);
1573       int ROffset = getMemoryOpOffset(RHS);
1574       assert(LHS == RHS || LOffset != ROffset);
1575       return LOffset > ROffset;
1576     }
1577   };
1578 }
1579 
1580 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1581                                  SmallVector<MachineInstr*, 4> &Ops,
1582                                  unsigned Base, bool isLd,
1583                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1584   bool RetVal = false;
1585 
1586   // Sort by offset (in reverse order).
1587   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1588 
1589   // The loads / stores of the same base are in order. Scan them from first to
1590   // last and check for the following:
1591   // 1. Any def of base.
1592   // 2. Any gaps.
1593   while (Ops.size() > 1) {
1594     unsigned FirstLoc = ~0U;
1595     unsigned LastLoc = 0;
1596     MachineInstr *FirstOp = 0;
1597     MachineInstr *LastOp = 0;
1598     int LastOffset = 0;
1599     unsigned LastOpcode = 0;
1600     unsigned LastBytes = 0;
1601     unsigned NumMove = 0;
1602     for (int i = Ops.size() - 1; i >= 0; --i) {
1603       MachineInstr *Op = Ops[i];
1604       unsigned Loc = MI2LocMap[Op];
1605       if (Loc <= FirstLoc) {
1606         FirstLoc = Loc;
1607         FirstOp = Op;
1608       }
1609       if (Loc >= LastLoc) {
1610         LastLoc = Loc;
1611         LastOp = Op;
1612       }
1613 
1614       unsigned Opcode = Op->getOpcode();
1615       if (LastOpcode && Opcode != LastOpcode)
1616         break;
1617 
1618       int Offset = getMemoryOpOffset(Op);
1619       unsigned Bytes = getLSMultipleTransferSize(Op);
1620       if (LastBytes) {
1621         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1622           break;
1623       }
1624       LastOffset = Offset;
1625       LastBytes = Bytes;
1626       LastOpcode = Opcode;
1627       if (++NumMove == 8) // FIXME: Tune this limit.
1628         break;
1629     }
1630 
1631     if (NumMove <= 1)
1632       Ops.pop_back();
1633     else {
1634       SmallPtrSet<MachineInstr*, 4> MemOps;
1635       SmallSet<unsigned, 4> MemRegs;
1636       for (int i = NumMove-1; i >= 0; --i) {
1637         MemOps.insert(Ops[i]);
1638         MemRegs.insert(Ops[i]->getOperand(0).getReg());
1639       }
1640 
1641       // Be conservative, if the instructions are too far apart, don't
1642       // move them. We want to limit the increase of register pressure.
1643       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1644       if (DoMove)
1645         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1646                                            MemOps, MemRegs, TRI);
1647       if (!DoMove) {
1648         for (unsigned i = 0; i != NumMove; ++i)
1649           Ops.pop_back();
1650       } else {
1651         // This is the new location for the loads / stores.
1652         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1653         while (InsertPos != MBB->end()
1654                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1655           ++InsertPos;
1656 
1657         // If we are moving a pair of loads / stores, see if it makes sense
1658         // to try to allocate a pair of registers that can form register pairs.
1659         MachineInstr *Op0 = Ops.back();
1660         MachineInstr *Op1 = Ops[Ops.size()-2];
1661         unsigned EvenReg = 0, OddReg = 0;
1662         unsigned BaseReg = 0, PredReg = 0;
1663         ARMCC::CondCodes Pred = ARMCC::AL;
1664         bool isT2 = false;
1665         unsigned NewOpc = 0;
1666         int Offset = 0;
1667         DebugLoc dl;
1668         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1669                                              EvenReg, OddReg, BaseReg,
1670                                              Offset, PredReg, Pred, isT2)) {
1671           Ops.pop_back();
1672           Ops.pop_back();
1673 
1674           const MCInstrDesc &MCID = TII->get(NewOpc);
1675           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1676           MRI->constrainRegClass(EvenReg, TRC);
1677           MRI->constrainRegClass(OddReg, TRC);
1678 
1679           // Form the pair instruction.
1680           if (isLd) {
1681             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1682               .addReg(EvenReg, RegState::Define)
1683               .addReg(OddReg, RegState::Define)
1684               .addReg(BaseReg);
1685             // FIXME: We're converting from LDRi12 to an insn that still
1686             // uses addrmode2, so we need an explicit offset reg. It should
1687             // always by reg0 since we're transforming LDRi12s.
1688             if (!isT2)
1689               MIB.addReg(0);
1690             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1691             ++NumLDRDFormed;
1692           } else {
1693             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1694               .addReg(EvenReg)
1695               .addReg(OddReg)
1696               .addReg(BaseReg);
1697             // FIXME: We're converting from LDRi12 to an insn that still
1698             // uses addrmode2, so we need an explicit offset reg. It should
1699             // always by reg0 since we're transforming STRi12s.
1700             if (!isT2)
1701               MIB.addReg(0);
1702             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1703             ++NumSTRDFormed;
1704           }
1705           MBB->erase(Op0);
1706           MBB->erase(Op1);
1707 
1708           // Add register allocation hints to form register pairs.
1709           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1710           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1711         } else {
1712           for (unsigned i = 0; i != NumMove; ++i) {
1713             MachineInstr *Op = Ops.back();
1714             Ops.pop_back();
1715             MBB->splice(InsertPos, MBB, Op);
1716           }
1717         }
1718 
1719         NumLdStMoved += NumMove;
1720         RetVal = true;
1721       }
1722     }
1723   }
1724 
1725   return RetVal;
1726 }
1727 
1728 bool
1729 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1730   bool RetVal = false;
1731 
1732   DenseMap<MachineInstr*, unsigned> MI2LocMap;
1733   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1734   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1735   SmallVector<unsigned, 4> LdBases;
1736   SmallVector<unsigned, 4> StBases;
1737 
1738   unsigned Loc = 0;
1739   MachineBasicBlock::iterator MBBI = MBB->begin();
1740   MachineBasicBlock::iterator E = MBB->end();
1741   while (MBBI != E) {
1742     for (; MBBI != E; ++MBBI) {
1743       MachineInstr *MI = MBBI;
1744       const MCInstrDesc &MCID = MI->getDesc();
1745       if (MCID.isCall() || MCID.isTerminator()) {
1746         // Stop at barriers.
1747         ++MBBI;
1748         break;
1749       }
1750 
1751       if (!MI->isDebugValue())
1752         MI2LocMap[MI] = ++Loc;
1753 
1754       if (!isMemoryOp(MI))
1755         continue;
1756       unsigned PredReg = 0;
1757       if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1758         continue;
1759 
1760       int Opc = MI->getOpcode();
1761       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1762       unsigned Base = MI->getOperand(1).getReg();
1763       int Offset = getMemoryOpOffset(MI);
1764 
1765       bool StopHere = false;
1766       if (isLd) {
1767         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1768           Base2LdsMap.find(Base);
1769         if (BI != Base2LdsMap.end()) {
1770           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1771             if (Offset == getMemoryOpOffset(BI->second[i])) {
1772               StopHere = true;
1773               break;
1774             }
1775           }
1776           if (!StopHere)
1777             BI->second.push_back(MI);
1778         } else {
1779           SmallVector<MachineInstr*, 4> MIs;
1780           MIs.push_back(MI);
1781           Base2LdsMap[Base] = MIs;
1782           LdBases.push_back(Base);
1783         }
1784       } else {
1785         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1786           Base2StsMap.find(Base);
1787         if (BI != Base2StsMap.end()) {
1788           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1789             if (Offset == getMemoryOpOffset(BI->second[i])) {
1790               StopHere = true;
1791               break;
1792             }
1793           }
1794           if (!StopHere)
1795             BI->second.push_back(MI);
1796         } else {
1797           SmallVector<MachineInstr*, 4> MIs;
1798           MIs.push_back(MI);
1799           Base2StsMap[Base] = MIs;
1800           StBases.push_back(Base);
1801         }
1802       }
1803 
1804       if (StopHere) {
1805         // Found a duplicate (a base+offset combination that's seen earlier).
1806         // Backtrack.
1807         --Loc;
1808         break;
1809       }
1810     }
1811 
1812     // Re-schedule loads.
1813     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1814       unsigned Base = LdBases[i];
1815       SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1816       if (Lds.size() > 1)
1817         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1818     }
1819 
1820     // Re-schedule stores.
1821     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1822       unsigned Base = StBases[i];
1823       SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1824       if (Sts.size() > 1)
1825         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1826     }
1827 
1828     if (MBBI != E) {
1829       Base2LdsMap.clear();
1830       Base2StsMap.clear();
1831       LdBases.clear();
1832       StBases.clear();
1833     }
1834   }
1835 
1836   return RetVal;
1837 }
1838 
1839 
1840 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1841 /// optimization pass.
1842 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1843   if (PreAlloc)
1844     return new ARMPreAllocLoadStoreOpt();
1845   return new ARMLoadStoreOpt();
1846 }
1847