1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "arm-ldst-opt"
16 #include "ARM.h"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
40 using namespace llvm;
41 
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
53 
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
56 
57 namespace {
58   struct ARMLoadStoreOpt : public MachineFunctionPass {
59     static char ID;
60     ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
61 
62     const TargetInstrInfo *TII;
63     const TargetRegisterInfo *TRI;
64     ARMFunctionInfo *AFI;
65     RegScavenger *RS;
66     bool isThumb2;
67 
68     virtual bool runOnMachineFunction(MachineFunction &Fn);
69 
70     virtual const char *getPassName() const {
71       return "ARM load / store optimization pass";
72     }
73 
74   private:
75     struct MemOpQueueEntry {
76       int Offset;
77       unsigned Position;
78       MachineBasicBlock::iterator MBBI;
79       bool Merged;
80       MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81         : Offset(o), Position(p), MBBI(i), Merged(false) {}
82     };
83     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84     typedef MemOpQueue::iterator MemOpQueueIter;
85 
86     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87                   int Offset, unsigned Base, bool BaseKill, int Opcode,
88                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89                   DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90     void MergeOpsUpdate(MachineBasicBlock &MBB,
91                         MemOpQueue &MemOps,
92                         unsigned memOpsBegin,
93                         unsigned memOpsEnd,
94                         unsigned insertAfter,
95                         int Offset,
96                         unsigned Base,
97                         bool BaseKill,
98                         int Opcode,
99                         ARMCC::CondCodes Pred,
100                         unsigned PredReg,
101                         unsigned Scratch,
102                         DebugLoc dl,
103                         SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105                       int Opcode, unsigned Size,
106                       ARMCC::CondCodes Pred, unsigned PredReg,
107                       unsigned Scratch, MemOpQueue &MemOps,
108                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
109 
110     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112                              MachineBasicBlock::iterator &MBBI);
113     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114                                   MachineBasicBlock::iterator MBBI,
115                                   const TargetInstrInfo *TII,
116                                   bool &Advance,
117                                   MachineBasicBlock::iterator &I);
118     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119                                    MachineBasicBlock::iterator MBBI,
120                                    bool &Advance,
121                                    MachineBasicBlock::iterator &I);
122     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
124   };
125   char ARMLoadStoreOpt::ID = 0;
126 }
127 
128 static int getLoadStoreMultipleOpcode(int Opcode) {
129   switch (Opcode) {
130   case ARM::LDR:
131     NumLDMGened++;
132     return ARM::LDM;
133   case ARM::STR:
134     NumSTMGened++;
135     return ARM::STM;
136   case ARM::t2LDRi8:
137   case ARM::t2LDRi12:
138     NumLDMGened++;
139     return ARM::t2LDM;
140   case ARM::t2STRi8:
141   case ARM::t2STRi12:
142     NumSTMGened++;
143     return ARM::t2STM;
144   case ARM::VLDRS:
145     NumVLDMGened++;
146     return ARM::VLDMS;
147   case ARM::VSTRS:
148     NumVSTMGened++;
149     return ARM::VSTMS;
150   case ARM::VLDRD:
151     NumVLDMGened++;
152     return ARM::VLDMD;
153   case ARM::VSTRD:
154     NumVSTMGened++;
155     return ARM::VSTMD;
156   default: llvm_unreachable("Unhandled opcode!");
157   }
158   return 0;
159 }
160 
161 static bool isT2i32Load(unsigned Opc) {
162   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
163 }
164 
165 static bool isi32Load(unsigned Opc) {
166   return Opc == ARM::LDR || isT2i32Load(Opc);
167 }
168 
169 static bool isT2i32Store(unsigned Opc) {
170   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
171 }
172 
173 static bool isi32Store(unsigned Opc) {
174   return Opc == ARM::STR || isT2i32Store(Opc);
175 }
176 
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
180 bool
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182                           MachineBasicBlock::iterator MBBI,
183                           int Offset, unsigned Base, bool BaseKill,
184                           int Opcode, ARMCC::CondCodes Pred,
185                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
186                           SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187   // Only a single register to load / store. Don't bother.
188   unsigned NumRegs = Regs.size();
189   if (NumRegs <= 1)
190     return false;
191 
192   ARM_AM::AMSubMode Mode = ARM_AM::ia;
193   bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194   if (isAM4 && Offset == 4) {
195     if (isThumb2)
196       // Thumb2 does not support ldmib / stmib.
197       return false;
198     Mode = ARM_AM::ib;
199   } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
200     if (isThumb2)
201       // Thumb2 does not support ldmda / stmda.
202       return false;
203     Mode = ARM_AM::da;
204   } else if (isAM4 && Offset == -4 * (int)NumRegs) {
205     Mode = ARM_AM::db;
206   } else if (Offset != 0) {
207     // If starting offset isn't zero, insert a MI to materialize a new base.
208     // But only do so if it is cost effective, i.e. merging more than two
209     // loads / stores.
210     if (NumRegs <= 2)
211       return false;
212 
213     unsigned NewBase;
214     if (isi32Load(Opcode))
215       // If it is a load, then just use one of the destination register to
216       // use as the new base.
217       NewBase = Regs[NumRegs-1].first;
218     else {
219       // Use the scratch register to use as a new base.
220       NewBase = Scratch;
221       if (NewBase == 0)
222         return false;
223     }
224     int BaseOpc = !isThumb2
225       ? ARM::ADDri
226       : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
227     if (Offset < 0) {
228       BaseOpc = !isThumb2
229         ? ARM::SUBri
230         : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
231       Offset = - Offset;
232     }
233     int ImmedOffset = isThumb2
234       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235     if (ImmedOffset == -1)
236       // FIXME: Try t2ADDri12 or t2SUBri12?
237       return false;  // Probably not worth it then.
238 
239     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241       .addImm(Pred).addReg(PredReg).addReg(0);
242     Base = NewBase;
243     BaseKill = true;  // New base is always killed right its use.
244   }
245 
246   bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
247   bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
248   Opcode = getLoadStoreMultipleOpcode(Opcode);
249   MachineInstrBuilder MIB = (isAM4)
250     ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
251         .addReg(Base, getKillRegState(BaseKill))
252         .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
253     : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
254         .addReg(Base, getKillRegState(BaseKill))
255         .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
256         .addImm(Pred).addReg(PredReg);
257   MIB.addReg(0); // Add optional writeback (0 for now).
258   for (unsigned i = 0; i != NumRegs; ++i)
259     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260                      | getKillRegState(Regs[i].second));
261 
262   return true;
263 }
264 
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
266 // success.
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
269                MemOpQueue &memOps,
270                unsigned memOpsBegin,
271                unsigned memOpsEnd,
272                unsigned insertAfter,
273                int Offset,
274                unsigned Base,
275                bool BaseKill,
276                int Opcode,
277                ARMCC::CondCodes Pred,
278                unsigned PredReg,
279                unsigned Scratch,
280                DebugLoc dl,
281                SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282   // First calculate which of the registers should be killed by the merged
283   // instruction.
284   SmallVector<std::pair<unsigned, bool>, 8> Regs;
285   const unsigned insertPos = memOps[insertAfter].Position;
286   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287     const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288     unsigned Reg = MO.getReg();
289     bool isKill = MO.isKill();
290 
291     // If we are inserting the merged operation after an unmerged operation that
292     // uses the same register, make sure to transfer any kill flag.
293     for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294       if (memOps[j].Position<insertPos) {
295         const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296         if (MOJ.getReg() == Reg && MOJ.isKill())
297           isKill = true;
298       }
299 
300     Regs.push_back(std::make_pair(Reg, isKill));
301   }
302 
303   // Try to do the merge.
304   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
305   Loc++;
306   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307                 Pred, PredReg, Scratch, dl, Regs))
308     return;
309 
310   // Merge succeeded, update records.
311   Merges.push_back(prior(Loc));
312   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313     // Remove kill flags from any unmerged memops that come before insertPos.
314     if (Regs[i-memOpsBegin].second)
315       for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316         if (memOps[j].Position<insertPos) {
317           MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318           if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319             MOJ.setIsKill(false);
320         }
321     MBB.erase(memOps[i].MBBI);
322     memOps[i].Merged = true;
323   }
324 }
325 
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
328 void
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330                           unsigned Base, int Opcode, unsigned Size,
331                           ARMCC::CondCodes Pred, unsigned PredReg,
332                           unsigned Scratch, MemOpQueue &MemOps,
333                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334   bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335   int Offset = MemOps[SIndex].Offset;
336   int SOffset = Offset;
337   unsigned insertAfter = SIndex;
338   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339   DebugLoc dl = Loc->getDebugLoc();
340   const MachineOperand &PMO = Loc->getOperand(0);
341   unsigned PReg = PMO.getReg();
342   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343     : ARMRegisterInfo::getRegisterNumbering(PReg);
344 
345   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346     int NewOffset = MemOps[i].Offset;
347     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348     unsigned Reg = MO.getReg();
349     unsigned RegNum = MO.isUndef() ? UINT_MAX
350       : ARMRegisterInfo::getRegisterNumbering(Reg);
351     // AM4 - register numbers in ascending order.
352     // AM5 - consecutive register numbers in ascending order.
353     if (Reg != ARM::SP &&
354         NewOffset == Offset + (int)Size &&
355         ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
356       Offset += Size;
357       PRegNum = RegNum;
358     } else {
359       // Can't merge this in. Try merge the earlier ones first.
360       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
361                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
362       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
363                    MemOps, Merges);
364       return;
365     }
366 
367     if (MemOps[i].Position > MemOps[insertAfter].Position)
368       insertAfter = i;
369   }
370 
371   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
372   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
373                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
374   return;
375 }
376 
377 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
378                                        unsigned Bytes, unsigned Limit,
379                                        ARMCC::CondCodes Pred, unsigned PredReg){
380   unsigned MyPredReg = 0;
381   if (!MI)
382     return false;
383   if (MI->getOpcode() != ARM::t2SUBri &&
384       MI->getOpcode() != ARM::t2SUBrSPi &&
385       MI->getOpcode() != ARM::t2SUBrSPi12 &&
386       MI->getOpcode() != ARM::tSUBspi &&
387       MI->getOpcode() != ARM::SUBri)
388     return false;
389 
390   // Make sure the offset fits in 8 bits.
391   if (Bytes <= 0 || (Limit && Bytes >= Limit))
392     return false;
393 
394   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
395   return (MI->getOperand(0).getReg() == Base &&
396           MI->getOperand(1).getReg() == Base &&
397           (MI->getOperand(2).getImm()*Scale) == Bytes &&
398           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
399           MyPredReg == PredReg);
400 }
401 
402 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
403                                        unsigned Bytes, unsigned Limit,
404                                        ARMCC::CondCodes Pred, unsigned PredReg){
405   unsigned MyPredReg = 0;
406   if (!MI)
407     return false;
408   if (MI->getOpcode() != ARM::t2ADDri &&
409       MI->getOpcode() != ARM::t2ADDrSPi &&
410       MI->getOpcode() != ARM::t2ADDrSPi12 &&
411       MI->getOpcode() != ARM::tADDspi &&
412       MI->getOpcode() != ARM::ADDri)
413     return false;
414 
415   if (Bytes <= 0 || (Limit && Bytes >= Limit))
416     // Make sure the offset fits in 8 bits.
417     return false;
418 
419   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
420   return (MI->getOperand(0).getReg() == Base &&
421           MI->getOperand(1).getReg() == Base &&
422           (MI->getOperand(2).getImm()*Scale) == Bytes &&
423           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
424           MyPredReg == PredReg);
425 }
426 
427 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
428   switch (MI->getOpcode()) {
429   default: return 0;
430   case ARM::LDR:
431   case ARM::STR:
432   case ARM::t2LDRi8:
433   case ARM::t2LDRi12:
434   case ARM::t2STRi8:
435   case ARM::t2STRi12:
436   case ARM::VLDRS:
437   case ARM::VSTRS:
438     return 4;
439   case ARM::VLDRD:
440   case ARM::VSTRD:
441     return 8;
442   case ARM::LDM:
443   case ARM::STM:
444   case ARM::t2LDM:
445   case ARM::t2STM:
446     return (MI->getNumOperands() - 5) * 4;
447   case ARM::VLDMS:
448   case ARM::VSTMS:
449   case ARM::VLDMD:
450   case ARM::VSTMD:
451     return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
452   }
453 }
454 
455 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
456 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
457 ///
458 /// stmia rn, <ra, rb, rc>
459 /// rn := rn + 4 * 3;
460 /// =>
461 /// stmia rn!, <ra, rb, rc>
462 ///
463 /// rn := rn - 4 * 3;
464 /// ldmia rn, <ra, rb, rc>
465 /// =>
466 /// ldmdb rn!, <ra, rb, rc>
467 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
468                                                MachineBasicBlock::iterator MBBI,
469                                                bool &Advance,
470                                                MachineBasicBlock::iterator &I) {
471   MachineInstr *MI = MBBI;
472   unsigned Base = MI->getOperand(0).getReg();
473   unsigned Bytes = getLSMultipleTransferSize(MI);
474   unsigned PredReg = 0;
475   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
476   int Opcode = MI->getOpcode();
477   bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
478     Opcode == ARM::STM || Opcode == ARM::t2STM;
479 
480   if (isAM4) {
481     if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
482       return false;
483 
484     // Can't use the updating AM4 sub-mode if the base register is also a dest
485     // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
486     for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
487       if (MI->getOperand(i).getReg() == Base)
488         return false;
489     }
490 
491     ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
492     if (MBBI != MBB.begin()) {
493       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
494       if (Mode == ARM_AM::ia &&
495           isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
496         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
497         MI->getOperand(4).setReg(Base);
498         MI->getOperand(4).setIsDef();
499         MBB.erase(PrevMBBI);
500         return true;
501       } else if (Mode == ARM_AM::ib &&
502                  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
503         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
504         MI->getOperand(4).setReg(Base);  // WB to base
505         MI->getOperand(4).setIsDef();
506         MBB.erase(PrevMBBI);
507         return true;
508       }
509     }
510 
511     if (MBBI != MBB.end()) {
512       MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
513       if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
514           isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
515         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
516         MI->getOperand(4).setReg(Base);  // WB to base
517         MI->getOperand(4).setIsDef();
518         if (NextMBBI == I) {
519           Advance = true;
520           ++I;
521         }
522         MBB.erase(NextMBBI);
523         return true;
524       } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
525                  isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
526         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
527         MI->getOperand(4).setReg(Base);  // WB to base
528         MI->getOperand(4).setIsDef();
529         if (NextMBBI == I) {
530           Advance = true;
531           ++I;
532         }
533         MBB.erase(NextMBBI);
534         return true;
535       }
536     }
537   } else {
538     // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
539     if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
540       return false;
541 
542     ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
543     unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
544     if (MBBI != MBB.begin()) {
545       MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
546       if (Mode == ARM_AM::ia &&
547           isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
548         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
549         MI->getOperand(4).setReg(Base);  // WB to base
550         MI->getOperand(4).setIsDef();
551         MBB.erase(PrevMBBI);
552         return true;
553       }
554     }
555 
556     if (MBBI != MBB.end()) {
557       MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
558       if (Mode == ARM_AM::ia &&
559           isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
560         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
561         MI->getOperand(4).setReg(Base);  // WB to base
562         MI->getOperand(4).setIsDef();
563         if (NextMBBI == I) {
564           Advance = true;
565           ++I;
566         }
567         MBB.erase(NextMBBI);
568       }
569       return true;
570     }
571   }
572 
573   return false;
574 }
575 
576 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
577   switch (Opc) {
578   case ARM::LDR: return ARM::LDR_PRE;
579   case ARM::STR: return ARM::STR_PRE;
580   case ARM::VLDRS: return ARM::VLDMS;
581   case ARM::VLDRD: return ARM::VLDMD;
582   case ARM::VSTRS: return ARM::VSTMS;
583   case ARM::VSTRD: return ARM::VSTMD;
584   case ARM::t2LDRi8:
585   case ARM::t2LDRi12:
586     return ARM::t2LDR_PRE;
587   case ARM::t2STRi8:
588   case ARM::t2STRi12:
589     return ARM::t2STR_PRE;
590   default: llvm_unreachable("Unhandled opcode!");
591   }
592   return 0;
593 }
594 
595 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
596   switch (Opc) {
597   case ARM::LDR: return ARM::LDR_POST;
598   case ARM::STR: return ARM::STR_POST;
599   case ARM::VLDRS: return ARM::VLDMS;
600   case ARM::VLDRD: return ARM::VLDMD;
601   case ARM::VSTRS: return ARM::VSTMS;
602   case ARM::VSTRD: return ARM::VSTMD;
603   case ARM::t2LDRi8:
604   case ARM::t2LDRi12:
605     return ARM::t2LDR_POST;
606   case ARM::t2STRi8:
607   case ARM::t2STRi12:
608     return ARM::t2STR_POST;
609   default: llvm_unreachable("Unhandled opcode!");
610   }
611   return 0;
612 }
613 
614 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
615 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
616 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
617                                                MachineBasicBlock::iterator MBBI,
618                                                const TargetInstrInfo *TII,
619                                                bool &Advance,
620                                                MachineBasicBlock::iterator &I) {
621   MachineInstr *MI = MBBI;
622   unsigned Base = MI->getOperand(1).getReg();
623   bool BaseKill = MI->getOperand(1).isKill();
624   unsigned Bytes = getLSMultipleTransferSize(MI);
625   int Opcode = MI->getOpcode();
626   DebugLoc dl = MI->getDebugLoc();
627   bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
628     Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
629   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
630   if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
631     return false;
632   else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
633     return false;
634   else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
635     if (MI->getOperand(2).getImm() != 0)
636       return false;
637 
638   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
639   // Can't do the merge if the destination register is the same as the would-be
640   // writeback register.
641   if (isLd && MI->getOperand(0).getReg() == Base)
642     return false;
643 
644   unsigned PredReg = 0;
645   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
646   bool DoMerge = false;
647   ARM_AM::AddrOpc AddSub = ARM_AM::add;
648   unsigned NewOpc = 0;
649   // AM2 - 12 bits, thumb2 - 8 bits.
650   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
651   if (MBBI != MBB.begin()) {
652     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
653     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
654       DoMerge = true;
655       AddSub = ARM_AM::sub;
656       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
657     } else if (!isAM5 &&
658                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
659       DoMerge = true;
660       NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
661     }
662     if (DoMerge)
663       MBB.erase(PrevMBBI);
664   }
665 
666   if (!DoMerge && MBBI != MBB.end()) {
667     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
668     if (!isAM5 &&
669         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
670       DoMerge = true;
671       AddSub = ARM_AM::sub;
672       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
673     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
674       DoMerge = true;
675       NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
676     }
677     if (DoMerge) {
678       if (NextMBBI == I) {
679         Advance = true;
680         ++I;
681       }
682       MBB.erase(NextMBBI);
683     }
684   }
685 
686   if (!DoMerge)
687     return false;
688 
689   bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
690   unsigned Offset = 0;
691   if (isAM5)
692     Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
693                                ? ARM_AM::db
694                                : ARM_AM::ia, true, (isDPR ? 2 : 1));
695   else if (isAM2)
696     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
697   else
698     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
699   if (isLd) {
700     if (isAM5)
701       // VLDMS, VLDMD
702       BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
703         .addReg(Base, getKillRegState(BaseKill))
704         .addImm(Offset).addImm(Pred).addReg(PredReg)
705         .addReg(Base, getDefRegState(true)) // WB base register
706         .addReg(MI->getOperand(0).getReg(), RegState::Define);
707     else if (isAM2)
708       // LDR_PRE, LDR_POST,
709       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
710         .addReg(Base, RegState::Define)
711         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
712     else
713       // t2LDR_PRE, t2LDR_POST
714       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
715         .addReg(Base, RegState::Define)
716         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
717   } else {
718     MachineOperand &MO = MI->getOperand(0);
719     if (isAM5)
720       // VSTMS, VSTMD
721       BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
722         .addImm(Pred).addReg(PredReg)
723         .addReg(Base, getDefRegState(true)) // WB base register
724         .addReg(MO.getReg(), getKillRegState(MO.isKill()));
725     else if (isAM2)
726       // STR_PRE, STR_POST
727       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
728         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
729         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
730     else
731       // t2STR_PRE, t2STR_POST
732       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
733         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
734         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
735   }
736   MBB.erase(MBBI);
737 
738   return true;
739 }
740 
741 /// isMemoryOp - Returns true if instruction is a memory operations (that this
742 /// pass is capable of operating on).
743 static bool isMemoryOp(const MachineInstr *MI) {
744   if (MI->hasOneMemOperand()) {
745     const MachineMemOperand *MMO = *MI->memoperands_begin();
746 
747     // Don't touch volatile memory accesses - we may be changing their order.
748     if (MMO->isVolatile())
749       return false;
750 
751     // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
752     if (MMO->getAlignment() < 4)
753       return false;
754   }
755 
756   int Opcode = MI->getOpcode();
757   switch (Opcode) {
758   default: break;
759   case ARM::LDR:
760   case ARM::STR:
761     return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
762   case ARM::VLDRS:
763   case ARM::VSTRS:
764     return MI->getOperand(1).isReg();
765   case ARM::VLDRD:
766   case ARM::VSTRD:
767     return MI->getOperand(1).isReg();
768   case ARM::t2LDRi8:
769   case ARM::t2LDRi12:
770   case ARM::t2STRi8:
771   case ARM::t2STRi12:
772     return MI->getOperand(1).isReg();
773   }
774   return false;
775 }
776 
777 /// AdvanceRS - Advance register scavenger to just before the earliest memory
778 /// op that is being merged.
779 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
780   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
781   unsigned Position = MemOps[0].Position;
782   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
783     if (MemOps[i].Position < Position) {
784       Position = MemOps[i].Position;
785       Loc = MemOps[i].MBBI;
786     }
787   }
788 
789   if (Loc != MBB.begin())
790     RS->forward(prior(Loc));
791 }
792 
793 static int getMemoryOpOffset(const MachineInstr *MI) {
794   int Opcode = MI->getOpcode();
795   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
796   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
797   unsigned NumOperands = MI->getDesc().getNumOperands();
798   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
799 
800   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
801       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
802       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
803     return OffField;
804 
805   int Offset = isAM2
806     ? ARM_AM::getAM2Offset(OffField)
807     : (isAM3 ? ARM_AM::getAM3Offset(OffField)
808              : ARM_AM::getAM5Offset(OffField) * 4);
809   if (isAM2) {
810     if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
811       Offset = -Offset;
812   } else if (isAM3) {
813     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
814       Offset = -Offset;
815   } else {
816     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
817       Offset = -Offset;
818   }
819   return Offset;
820 }
821 
822 static void InsertLDR_STR(MachineBasicBlock &MBB,
823                           MachineBasicBlock::iterator &MBBI,
824                           int OffImm, bool isDef,
825                           DebugLoc dl, unsigned NewOpc,
826                           unsigned Reg, bool RegDeadKill, bool RegUndef,
827                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
828                           unsigned OffReg, bool OffKill, bool OffUndef,
829                           ARMCC::CondCodes Pred, unsigned PredReg,
830                           const TargetInstrInfo *TII, bool isT2) {
831   int Offset = OffImm;
832   if (!isT2) {
833     if (OffImm < 0)
834       Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
835     else
836       Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
837   }
838   if (isDef) {
839     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
840                                       TII->get(NewOpc))
841       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
842       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
843     if (!isT2)
844       MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
845     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
846   } else {
847     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
848                                       TII->get(NewOpc))
849       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
850       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
851     if (!isT2)
852       MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
853     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
854   }
855 }
856 
857 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
858                                           MachineBasicBlock::iterator &MBBI) {
859   MachineInstr *MI = &*MBBI;
860   unsigned Opcode = MI->getOpcode();
861   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
862       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
863     unsigned EvenReg = MI->getOperand(0).getReg();
864     unsigned OddReg  = MI->getOperand(1).getReg();
865     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
866     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
867     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
868       return false;
869 
870     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
871     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
872     bool EvenDeadKill = isLd ?
873       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
874     bool EvenUndef = MI->getOperand(0).isUndef();
875     bool OddDeadKill  = isLd ?
876       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
877     bool OddUndef = MI->getOperand(1).isUndef();
878     const MachineOperand &BaseOp = MI->getOperand(2);
879     unsigned BaseReg = BaseOp.getReg();
880     bool BaseKill = BaseOp.isKill();
881     bool BaseUndef = BaseOp.isUndef();
882     unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
883     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
884     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
885     int OffImm = getMemoryOpOffset(MI);
886     unsigned PredReg = 0;
887     ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
888 
889     if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
890       // Ascending register numbers and no offset. It's safe to change it to a
891       // ldm or stm.
892       unsigned NewOpc = (isLd)
893         ? (isT2 ? ARM::t2LDM : ARM::LDM)
894         : (isT2 ? ARM::t2STM : ARM::STM);
895       if (isLd) {
896         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
897           .addReg(BaseReg, getKillRegState(BaseKill))
898           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
899           .addImm(Pred).addReg(PredReg)
900           .addReg(0)
901           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
902           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
903         ++NumLDRD2LDM;
904       } else {
905         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
906           .addReg(BaseReg, getKillRegState(BaseKill))
907           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
908           .addImm(Pred).addReg(PredReg)
909           .addReg(0)
910           .addReg(EvenReg,
911                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
912           .addReg(OddReg,
913                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
914         ++NumSTRD2STM;
915       }
916     } else {
917       // Split into two instructions.
918       assert((!isT2 || !OffReg) &&
919              "Thumb2 ldrd / strd does not encode offset register!");
920       unsigned NewOpc = (isLd)
921         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
922         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
923       DebugLoc dl = MBBI->getDebugLoc();
924       // If this is a load and base register is killed, it may have been
925       // re-defed by the load, make sure the first load does not clobber it.
926       if (isLd &&
927           (BaseKill || OffKill) &&
928           (TRI->regsOverlap(EvenReg, BaseReg) ||
929            (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
930         assert(!TRI->regsOverlap(OddReg, BaseReg) &&
931                (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
932         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
933                       OddReg, OddDeadKill, false,
934                       BaseReg, false, BaseUndef, OffReg, false, OffUndef,
935                       Pred, PredReg, TII, isT2);
936         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
937                       EvenReg, EvenDeadKill, false,
938                       BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
939                       Pred, PredReg, TII, isT2);
940       } else {
941         if (OddReg == EvenReg && EvenDeadKill) {
942           // If the two source operands are the same, the kill marker is probably
943           // on the first one. e.g.
944           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
945           EvenDeadKill = false;
946           OddDeadKill = true;
947         }
948         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
949                       EvenReg, EvenDeadKill, EvenUndef,
950                       BaseReg, false, BaseUndef, OffReg, false, OffUndef,
951                       Pred, PredReg, TII, isT2);
952         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
953                       OddReg, OddDeadKill, OddUndef,
954                       BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
955                       Pred, PredReg, TII, isT2);
956       }
957       if (isLd)
958         ++NumLDRD2LDR;
959       else
960         ++NumSTRD2STR;
961     }
962 
963     MBBI = prior(MBBI);
964     MBB.erase(MI);
965   }
966   return false;
967 }
968 
969 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
970 /// ops of the same base and incrementing offset into LDM / STM ops.
971 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
972   unsigned NumMerges = 0;
973   unsigned NumMemOps = 0;
974   MemOpQueue MemOps;
975   unsigned CurrBase = 0;
976   int CurrOpc = -1;
977   unsigned CurrSize = 0;
978   ARMCC::CondCodes CurrPred = ARMCC::AL;
979   unsigned CurrPredReg = 0;
980   unsigned Position = 0;
981   SmallVector<MachineBasicBlock::iterator,4> Merges;
982 
983   RS->enterBasicBlock(&MBB);
984   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
985   while (MBBI != E) {
986     if (FixInvalidRegPairOp(MBB, MBBI))
987       continue;
988 
989     bool Advance  = false;
990     bool TryMerge = false;
991     bool Clobber  = false;
992 
993     bool isMemOp = isMemoryOp(MBBI);
994     if (isMemOp) {
995       int Opcode = MBBI->getOpcode();
996       unsigned Size = getLSMultipleTransferSize(MBBI);
997       unsigned Base = MBBI->getOperand(1).getReg();
998       unsigned PredReg = 0;
999       ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1000       int Offset = getMemoryOpOffset(MBBI);
1001       // Watch out for:
1002       // r4 := ldr [r5]
1003       // r5 := ldr [r5, #4]
1004       // r6 := ldr [r5, #8]
1005       //
1006       // The second ldr has effectively broken the chain even though it
1007       // looks like the later ldr(s) use the same base register. Try to
1008       // merge the ldr's so far, including this one. But don't try to
1009       // combine the following ldr(s).
1010       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1011       if (CurrBase == 0 && !Clobber) {
1012         // Start of a new chain.
1013         CurrBase = Base;
1014         CurrOpc  = Opcode;
1015         CurrSize = Size;
1016         CurrPred = Pred;
1017         CurrPredReg = PredReg;
1018         MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1019         NumMemOps++;
1020         Advance = true;
1021       } else {
1022         if (Clobber) {
1023           TryMerge = true;
1024           Advance = true;
1025         }
1026 
1027         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1028           // No need to match PredReg.
1029           // Continue adding to the queue.
1030           if (Offset > MemOps.back().Offset) {
1031             MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1032             NumMemOps++;
1033             Advance = true;
1034           } else {
1035             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1036                  I != E; ++I) {
1037               if (Offset < I->Offset) {
1038                 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1039                 NumMemOps++;
1040                 Advance = true;
1041                 break;
1042               } else if (Offset == I->Offset) {
1043                 // Collision! This can't be merged!
1044                 break;
1045               }
1046             }
1047           }
1048         }
1049       }
1050     }
1051 
1052     if (Advance) {
1053       ++Position;
1054       ++MBBI;
1055       if (MBBI == E)
1056         // Reach the end of the block, try merging the memory instructions.
1057         TryMerge = true;
1058     } else
1059       TryMerge = true;
1060 
1061     if (TryMerge) {
1062       if (NumMemOps > 1) {
1063         // Try to find a free register to use as a new base in case it's needed.
1064         // First advance to the instruction just before the start of the chain.
1065         AdvanceRS(MBB, MemOps);
1066         // Find a scratch register.
1067         unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1068         // Process the load / store instructions.
1069         RS->forward(prior(MBBI));
1070 
1071         // Merge ops.
1072         Merges.clear();
1073         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1074                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1075 
1076         // Try folding preceeding/trailing base inc/dec into the generated
1077         // LDM/STM ops.
1078         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1079           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1080             ++NumMerges;
1081         NumMerges += Merges.size();
1082 
1083         // Try folding preceeding/trailing base inc/dec into those load/store
1084         // that were not merged to form LDM/STM ops.
1085         for (unsigned i = 0; i != NumMemOps; ++i)
1086           if (!MemOps[i].Merged)
1087             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1088               ++NumMerges;
1089 
1090         // RS may be pointing to an instruction that's deleted.
1091         RS->skipTo(prior(MBBI));
1092       } else if (NumMemOps == 1) {
1093         // Try folding preceeding/trailing base inc/dec into the single
1094         // load/store.
1095         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1096           ++NumMerges;
1097           RS->forward(prior(MBBI));
1098         }
1099       }
1100 
1101       CurrBase = 0;
1102       CurrOpc = -1;
1103       CurrSize = 0;
1104       CurrPred = ARMCC::AL;
1105       CurrPredReg = 0;
1106       if (NumMemOps) {
1107         MemOps.clear();
1108         NumMemOps = 0;
1109       }
1110 
1111       // If iterator hasn't been advanced and this is not a memory op, skip it.
1112       // It can't start a new chain anyway.
1113       if (!Advance && !isMemOp && MBBI != E) {
1114         ++Position;
1115         ++MBBI;
1116       }
1117     }
1118   }
1119   return NumMerges > 0;
1120 }
1121 
1122 namespace {
1123   struct OffsetCompare {
1124     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1125       int LOffset = getMemoryOpOffset(LHS);
1126       int ROffset = getMemoryOpOffset(RHS);
1127       assert(LHS == RHS || LOffset != ROffset);
1128       return LOffset > ROffset;
1129     }
1130   };
1131 }
1132 
1133 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1134 /// (bx lr) into the preceeding stack restore so it directly restore the value
1135 /// of LR into pc.
1136 ///   ldmfd sp!, {r7, lr}
1137 ///   bx lr
1138 /// =>
1139 ///   ldmfd sp!, {r7, pc}
1140 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1141   if (MBB.empty()) return false;
1142 
1143   MachineBasicBlock::iterator MBBI = prior(MBB.end());
1144   if (MBBI != MBB.begin() &&
1145       (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1146     MachineInstr *PrevMI = prior(MBBI);
1147     if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1148       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1149       if (MO.getReg() != ARM::LR)
1150         return false;
1151       unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1152       PrevMI->setDesc(TII->get(NewOpc));
1153       MO.setReg(ARM::PC);
1154       MBB.erase(MBBI);
1155       return true;
1156     }
1157   }
1158   return false;
1159 }
1160 
1161 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1162   const TargetMachine &TM = Fn.getTarget();
1163   AFI = Fn.getInfo<ARMFunctionInfo>();
1164   TII = TM.getInstrInfo();
1165   TRI = TM.getRegisterInfo();
1166   RS = new RegScavenger();
1167   isThumb2 = AFI->isThumb2Function();
1168 
1169   bool Modified = false;
1170   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1171        ++MFI) {
1172     MachineBasicBlock &MBB = *MFI;
1173     Modified |= LoadStoreMultipleOpti(MBB);
1174     Modified |= MergeReturnIntoLDM(MBB);
1175   }
1176 
1177   delete RS;
1178   return Modified;
1179 }
1180 
1181 
1182 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1183 /// load / stores from consecutive locations close to make it more
1184 /// likely they will be combined later.
1185 
1186 namespace {
1187   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1188     static char ID;
1189     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1190 
1191     const TargetData *TD;
1192     const TargetInstrInfo *TII;
1193     const TargetRegisterInfo *TRI;
1194     const ARMSubtarget *STI;
1195     MachineRegisterInfo *MRI;
1196     MachineFunction *MF;
1197 
1198     virtual bool runOnMachineFunction(MachineFunction &Fn);
1199 
1200     virtual const char *getPassName() const {
1201       return "ARM pre- register allocation load / store optimization pass";
1202     }
1203 
1204   private:
1205     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1206                           unsigned &NewOpc, unsigned &EvenReg,
1207                           unsigned &OddReg, unsigned &BaseReg,
1208                           unsigned &OffReg, int &Offset,
1209                           unsigned &PredReg, ARMCC::CondCodes &Pred,
1210                           bool &isT2);
1211     bool RescheduleOps(MachineBasicBlock *MBB,
1212                        SmallVector<MachineInstr*, 4> &Ops,
1213                        unsigned Base, bool isLd,
1214                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1215     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1216   };
1217   char ARMPreAllocLoadStoreOpt::ID = 0;
1218 }
1219 
1220 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1221   TD  = Fn.getTarget().getTargetData();
1222   TII = Fn.getTarget().getInstrInfo();
1223   TRI = Fn.getTarget().getRegisterInfo();
1224   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1225   MRI = &Fn.getRegInfo();
1226   MF  = &Fn;
1227 
1228   bool Modified = false;
1229   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1230        ++MFI)
1231     Modified |= RescheduleLoadStoreInstrs(MFI);
1232 
1233   return Modified;
1234 }
1235 
1236 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1237                                       MachineBasicBlock::iterator I,
1238                                       MachineBasicBlock::iterator E,
1239                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
1240                                       SmallSet<unsigned, 4> &MemRegs,
1241                                       const TargetRegisterInfo *TRI) {
1242   // Are there stores / loads / calls between them?
1243   // FIXME: This is overly conservative. We should make use of alias information
1244   // some day.
1245   SmallSet<unsigned, 4> AddedRegPressure;
1246   while (++I != E) {
1247     if (MemOps.count(&*I))
1248       continue;
1249     const TargetInstrDesc &TID = I->getDesc();
1250     if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1251       return false;
1252     if (isLd && TID.mayStore())
1253       return false;
1254     if (!isLd) {
1255       if (TID.mayLoad())
1256         return false;
1257       // It's not safe to move the first 'str' down.
1258       // str r1, [r0]
1259       // strh r5, [r0]
1260       // str r4, [r0, #+4]
1261       if (TID.mayStore())
1262         return false;
1263     }
1264     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1265       MachineOperand &MO = I->getOperand(j);
1266       if (!MO.isReg())
1267         continue;
1268       unsigned Reg = MO.getReg();
1269       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1270         return false;
1271       if (Reg != Base && !MemRegs.count(Reg))
1272         AddedRegPressure.insert(Reg);
1273     }
1274   }
1275 
1276   // Estimate register pressure increase due to the transformation.
1277   if (MemRegs.size() <= 4)
1278     // Ok if we are moving small number of instructions.
1279     return true;
1280   return AddedRegPressure.size() <= MemRegs.size() * 2;
1281 }
1282 
1283 bool
1284 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1285                                           DebugLoc &dl,
1286                                           unsigned &NewOpc, unsigned &EvenReg,
1287                                           unsigned &OddReg, unsigned &BaseReg,
1288                                           unsigned &OffReg, int &Offset,
1289                                           unsigned &PredReg,
1290                                           ARMCC::CondCodes &Pred,
1291                                           bool &isT2) {
1292   // Make sure we're allowed to generate LDRD/STRD.
1293   if (!STI->hasV5TEOps())
1294     return false;
1295 
1296   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1297   unsigned Scale = 1;
1298   unsigned Opcode = Op0->getOpcode();
1299   if (Opcode == ARM::LDR)
1300     NewOpc = ARM::LDRD;
1301   else if (Opcode == ARM::STR)
1302     NewOpc = ARM::STRD;
1303   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1304     NewOpc = ARM::t2LDRDi8;
1305     Scale = 4;
1306     isT2 = true;
1307   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1308     NewOpc = ARM::t2STRDi8;
1309     Scale = 4;
1310     isT2 = true;
1311   } else
1312     return false;
1313 
1314   // Make sure the offset registers match.
1315   if (!isT2 &&
1316       (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1317       return false;
1318 
1319   // Must sure the base address satisfies i64 ld / st alignment requirement.
1320   if (!Op0->hasOneMemOperand() ||
1321       !(*Op0->memoperands_begin())->getValue() ||
1322       (*Op0->memoperands_begin())->isVolatile())
1323     return false;
1324 
1325   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1326   Function *Func = MF->getFunction();
1327   unsigned ReqAlign = STI->hasV6Ops()
1328     ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1329     : 8;  // Pre-v6 need 8-byte align
1330   if (Align < ReqAlign)
1331     return false;
1332 
1333   // Then make sure the immediate offset fits.
1334   int OffImm = getMemoryOpOffset(Op0);
1335   if (isT2) {
1336     if (OffImm < 0) {
1337       if (OffImm < -255)
1338         // Can't fall back to t2LDRi8 / t2STRi8.
1339         return false;
1340     } else {
1341       int Limit = (1 << 8) * Scale;
1342       if (OffImm >= Limit || (OffImm & (Scale-1)))
1343         return false;
1344     }
1345     Offset = OffImm;
1346   } else {
1347     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1348     if (OffImm < 0) {
1349       AddSub = ARM_AM::sub;
1350       OffImm = - OffImm;
1351     }
1352     int Limit = (1 << 8) * Scale;
1353     if (OffImm >= Limit || (OffImm & (Scale-1)))
1354       return false;
1355     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1356   }
1357   EvenReg = Op0->getOperand(0).getReg();
1358   OddReg  = Op1->getOperand(0).getReg();
1359   if (EvenReg == OddReg)
1360     return false;
1361   BaseReg = Op0->getOperand(1).getReg();
1362   if (!isT2)
1363     OffReg = Op0->getOperand(2).getReg();
1364   Pred = llvm::getInstrPredicate(Op0, PredReg);
1365   dl = Op0->getDebugLoc();
1366   return true;
1367 }
1368 
1369 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1370                                  SmallVector<MachineInstr*, 4> &Ops,
1371                                  unsigned Base, bool isLd,
1372                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1373   bool RetVal = false;
1374 
1375   // Sort by offset (in reverse order).
1376   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1377 
1378   // The loads / stores of the same base are in order. Scan them from first to
1379   // last and check for the followins:
1380   // 1. Any def of base.
1381   // 2. Any gaps.
1382   while (Ops.size() > 1) {
1383     unsigned FirstLoc = ~0U;
1384     unsigned LastLoc = 0;
1385     MachineInstr *FirstOp = 0;
1386     MachineInstr *LastOp = 0;
1387     int LastOffset = 0;
1388     unsigned LastOpcode = 0;
1389     unsigned LastBytes = 0;
1390     unsigned NumMove = 0;
1391     for (int i = Ops.size() - 1; i >= 0; --i) {
1392       MachineInstr *Op = Ops[i];
1393       unsigned Loc = MI2LocMap[Op];
1394       if (Loc <= FirstLoc) {
1395         FirstLoc = Loc;
1396         FirstOp = Op;
1397       }
1398       if (Loc >= LastLoc) {
1399         LastLoc = Loc;
1400         LastOp = Op;
1401       }
1402 
1403       unsigned Opcode = Op->getOpcode();
1404       if (LastOpcode && Opcode != LastOpcode)
1405         break;
1406 
1407       int Offset = getMemoryOpOffset(Op);
1408       unsigned Bytes = getLSMultipleTransferSize(Op);
1409       if (LastBytes) {
1410         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1411           break;
1412       }
1413       LastOffset = Offset;
1414       LastBytes = Bytes;
1415       LastOpcode = Opcode;
1416       if (++NumMove == 8) // FIXME: Tune this limit.
1417         break;
1418     }
1419 
1420     if (NumMove <= 1)
1421       Ops.pop_back();
1422     else {
1423       SmallPtrSet<MachineInstr*, 4> MemOps;
1424       SmallSet<unsigned, 4> MemRegs;
1425       for (int i = NumMove-1; i >= 0; --i) {
1426         MemOps.insert(Ops[i]);
1427         MemRegs.insert(Ops[i]->getOperand(0).getReg());
1428       }
1429 
1430       // Be conservative, if the instructions are too far apart, don't
1431       // move them. We want to limit the increase of register pressure.
1432       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1433       if (DoMove)
1434         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1435                                            MemOps, MemRegs, TRI);
1436       if (!DoMove) {
1437         for (unsigned i = 0; i != NumMove; ++i)
1438           Ops.pop_back();
1439       } else {
1440         // This is the new location for the loads / stores.
1441         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1442         while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1443           ++InsertPos;
1444 
1445         // If we are moving a pair of loads / stores, see if it makes sense
1446         // to try to allocate a pair of registers that can form register pairs.
1447         MachineInstr *Op0 = Ops.back();
1448         MachineInstr *Op1 = Ops[Ops.size()-2];
1449         unsigned EvenReg = 0, OddReg = 0;
1450         unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1451         ARMCC::CondCodes Pred = ARMCC::AL;
1452         bool isT2 = false;
1453         unsigned NewOpc = 0;
1454         int Offset = 0;
1455         DebugLoc dl;
1456         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1457                                              EvenReg, OddReg, BaseReg, OffReg,
1458                                              Offset, PredReg, Pred, isT2)) {
1459           Ops.pop_back();
1460           Ops.pop_back();
1461 
1462           // Form the pair instruction.
1463           if (isLd) {
1464             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1465                                               dl, TII->get(NewOpc))
1466               .addReg(EvenReg, RegState::Define)
1467               .addReg(OddReg, RegState::Define)
1468               .addReg(BaseReg);
1469             if (!isT2)
1470               MIB.addReg(OffReg);
1471             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1472             ++NumLDRDFormed;
1473           } else {
1474             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1475                                               dl, TII->get(NewOpc))
1476               .addReg(EvenReg)
1477               .addReg(OddReg)
1478               .addReg(BaseReg);
1479             if (!isT2)
1480               MIB.addReg(OffReg);
1481             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1482             ++NumSTRDFormed;
1483           }
1484           MBB->erase(Op0);
1485           MBB->erase(Op1);
1486 
1487           // Add register allocation hints to form register pairs.
1488           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1489           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1490         } else {
1491           for (unsigned i = 0; i != NumMove; ++i) {
1492             MachineInstr *Op = Ops.back();
1493             Ops.pop_back();
1494             MBB->splice(InsertPos, MBB, Op);
1495           }
1496         }
1497 
1498         NumLdStMoved += NumMove;
1499         RetVal = true;
1500       }
1501     }
1502   }
1503 
1504   return RetVal;
1505 }
1506 
1507 bool
1508 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1509   bool RetVal = false;
1510 
1511   DenseMap<MachineInstr*, unsigned> MI2LocMap;
1512   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1513   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1514   SmallVector<unsigned, 4> LdBases;
1515   SmallVector<unsigned, 4> StBases;
1516 
1517   unsigned Loc = 0;
1518   MachineBasicBlock::iterator MBBI = MBB->begin();
1519   MachineBasicBlock::iterator E = MBB->end();
1520   while (MBBI != E) {
1521     for (; MBBI != E; ++MBBI) {
1522       MachineInstr *MI = MBBI;
1523       const TargetInstrDesc &TID = MI->getDesc();
1524       if (TID.isCall() || TID.isTerminator()) {
1525         // Stop at barriers.
1526         ++MBBI;
1527         break;
1528       }
1529 
1530       MI2LocMap[MI] = Loc++;
1531       if (!isMemoryOp(MI))
1532         continue;
1533       unsigned PredReg = 0;
1534       if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1535         continue;
1536 
1537       int Opc = MI->getOpcode();
1538       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1539       unsigned Base = MI->getOperand(1).getReg();
1540       int Offset = getMemoryOpOffset(MI);
1541 
1542       bool StopHere = false;
1543       if (isLd) {
1544         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1545           Base2LdsMap.find(Base);
1546         if (BI != Base2LdsMap.end()) {
1547           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1548             if (Offset == getMemoryOpOffset(BI->second[i])) {
1549               StopHere = true;
1550               break;
1551             }
1552           }
1553           if (!StopHere)
1554             BI->second.push_back(MI);
1555         } else {
1556           SmallVector<MachineInstr*, 4> MIs;
1557           MIs.push_back(MI);
1558           Base2LdsMap[Base] = MIs;
1559           LdBases.push_back(Base);
1560         }
1561       } else {
1562         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1563           Base2StsMap.find(Base);
1564         if (BI != Base2StsMap.end()) {
1565           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1566             if (Offset == getMemoryOpOffset(BI->second[i])) {
1567               StopHere = true;
1568               break;
1569             }
1570           }
1571           if (!StopHere)
1572             BI->second.push_back(MI);
1573         } else {
1574           SmallVector<MachineInstr*, 4> MIs;
1575           MIs.push_back(MI);
1576           Base2StsMap[Base] = MIs;
1577           StBases.push_back(Base);
1578         }
1579       }
1580 
1581       if (StopHere) {
1582         // Found a duplicate (a base+offset combination that's seen earlier).
1583         // Backtrack.
1584         --Loc;
1585         break;
1586       }
1587     }
1588 
1589     // Re-schedule loads.
1590     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1591       unsigned Base = LdBases[i];
1592       SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1593       if (Lds.size() > 1)
1594         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1595     }
1596 
1597     // Re-schedule stores.
1598     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1599       unsigned Base = StBases[i];
1600       SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1601       if (Sts.size() > 1)
1602         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1603     }
1604 
1605     if (MBBI != E) {
1606       Base2LdsMap.clear();
1607       Base2StsMap.clear();
1608       LdBases.clear();
1609       StBases.clear();
1610     }
1611   }
1612 
1613   return RetVal;
1614 }
1615 
1616 
1617 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1618 /// optimization pass.
1619 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1620   if (PreAlloc)
1621     return new ARMPreAllocLoadStoreOpt();
1622   return new ARMLoadStoreOpt();
1623 }
1624