182dd6ac3SSilviu Baranga //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
282dd6ac3SSilviu Baranga //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
682dd6ac3SSilviu Baranga //
782dd6ac3SSilviu Baranga //===----------------------------------------------------------------------===//
882dd6ac3SSilviu Baranga //
982dd6ac3SSilviu Baranga // The Cortex-A15 processor employs a tracking scheme in its register renaming
1082dd6ac3SSilviu Baranga // in order to process each instruction's micro-ops speculatively and
1182dd6ac3SSilviu Baranga // out-of-order with appropriate forwarding. The ARM architecture allows VFP
1282dd6ac3SSilviu Baranga // instructions to read and write 32-bit S-registers.  Each S-register
1382dd6ac3SSilviu Baranga // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
1482dd6ac3SSilviu Baranga //
1582dd6ac3SSilviu Baranga // There are several instruction patterns which can be used to provide this
1682dd6ac3SSilviu Baranga // capability which can provide higher performance than other, potentially more
1782dd6ac3SSilviu Baranga // direct patterns, specifically around when one micro-op reads a D-register
1882dd6ac3SSilviu Baranga // operand that has recently been written as one or more S-register results.
1982dd6ac3SSilviu Baranga //
2082dd6ac3SSilviu Baranga // This file defines a pre-regalloc pass which looks for SPR producers which
2182dd6ac3SSilviu Baranga // are going to be used by a DPR (or QPR) consumers and creates the more
2282dd6ac3SSilviu Baranga // optimized access pattern.
2382dd6ac3SSilviu Baranga //
2482dd6ac3SSilviu Baranga //===----------------------------------------------------------------------===//
2582dd6ac3SSilviu Baranga 
2682dd6ac3SSilviu Baranga #include "ARM.h"
2782dd6ac3SSilviu Baranga #include "ARMBaseInstrInfo.h"
28a9253267SCraig Topper #include "ARMBaseRegisterInfo.h"
2963b44882SEric Christopher #include "ARMSubtarget.h"
3082dd6ac3SSilviu Baranga #include "llvm/ADT/Statistic.h"
3163b44882SEric Christopher #include "llvm/CodeGen/MachineFunction.h"
3282dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineFunctionPass.h"
3382dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineInstr.h"
3482dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineInstrBuilder.h"
3582dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineRegisterInfo.h"
36b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetRegisterInfo.h"
37b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetSubtargetInfo.h"
3882dd6ac3SSilviu Baranga #include "llvm/Support/Debug.h"
39799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
404c67d5a1SEric Christopher #include <map>
4182dd6ac3SSilviu Baranga #include <set>
4282dd6ac3SSilviu Baranga 
4382dd6ac3SSilviu Baranga using namespace llvm;
4482dd6ac3SSilviu Baranga 
4584e68b29SChandler Carruth #define DEBUG_TYPE "a15-sd-optimizer"
4684e68b29SChandler Carruth 
4782dd6ac3SSilviu Baranga namespace {
4882dd6ac3SSilviu Baranga   struct A15SDOptimizer : public MachineFunctionPass {
4982dd6ac3SSilviu Baranga     static char ID;
A15SDOptimizer__anon766ab5c60111::A15SDOptimizer5082dd6ac3SSilviu Baranga     A15SDOptimizer() : MachineFunctionPass(ID) {}
5182dd6ac3SSilviu Baranga 
526bc27bf3SCraig Topper     bool runOnMachineFunction(MachineFunction &Fn) override;
5382dd6ac3SSilviu Baranga 
getPassName__anon766ab5c60111::A15SDOptimizer54117296c0SMehdi Amini     StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
5582dd6ac3SSilviu Baranga 
5682dd6ac3SSilviu Baranga   private:
5782dd6ac3SSilviu Baranga     const ARMBaseInstrInfo *TII;
5882dd6ac3SSilviu Baranga     const TargetRegisterInfo *TRI;
5982dd6ac3SSilviu Baranga     MachineRegisterInfo *MRI;
6082dd6ac3SSilviu Baranga 
6182dd6ac3SSilviu Baranga     bool runOnInstruction(MachineInstr *MI);
6282dd6ac3SSilviu Baranga 
6382dd6ac3SSilviu Baranga     //
6482dd6ac3SSilviu Baranga     // Instruction builder helpers
6582dd6ac3SSilviu Baranga     //
6682dd6ac3SSilviu Baranga     unsigned createDupLane(MachineBasicBlock &MBB,
6782dd6ac3SSilviu Baranga                            MachineBasicBlock::iterator InsertBefore,
68bdc4956bSBenjamin Kramer                            const DebugLoc &DL, unsigned Reg, unsigned Lane,
6982dd6ac3SSilviu Baranga                            bool QPR = false);
7082dd6ac3SSilviu Baranga 
7182dd6ac3SSilviu Baranga     unsigned createExtractSubreg(MachineBasicBlock &MBB,
7282dd6ac3SSilviu Baranga                                  MachineBasicBlock::iterator InsertBefore,
73bdc4956bSBenjamin Kramer                                  const DebugLoc &DL, unsigned DReg,
74bdc4956bSBenjamin Kramer                                  unsigned Lane, const TargetRegisterClass *TRC);
7582dd6ac3SSilviu Baranga 
7682dd6ac3SSilviu Baranga     unsigned createVExt(MachineBasicBlock &MBB,
7782dd6ac3SSilviu Baranga                         MachineBasicBlock::iterator InsertBefore,
78bdc4956bSBenjamin Kramer                         const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
7982dd6ac3SSilviu Baranga 
8082dd6ac3SSilviu Baranga     unsigned createRegSequence(MachineBasicBlock &MBB,
8182dd6ac3SSilviu Baranga                                MachineBasicBlock::iterator InsertBefore,
82bdc4956bSBenjamin Kramer                                const DebugLoc &DL, unsigned Reg1,
83bdc4956bSBenjamin Kramer                                unsigned Reg2);
8482dd6ac3SSilviu Baranga 
8582dd6ac3SSilviu Baranga     unsigned createInsertSubreg(MachineBasicBlock &MBB,
8682dd6ac3SSilviu Baranga                                 MachineBasicBlock::iterator InsertBefore,
87bdc4956bSBenjamin Kramer                                 const DebugLoc &DL, unsigned DReg,
88bdc4956bSBenjamin Kramer                                 unsigned Lane, unsigned ToInsert);
8982dd6ac3SSilviu Baranga 
9082dd6ac3SSilviu Baranga     unsigned createImplicitDef(MachineBasicBlock &MBB,
9182dd6ac3SSilviu Baranga                                MachineBasicBlock::iterator InsertBefore,
92bdc4956bSBenjamin Kramer                                const DebugLoc &DL);
9382dd6ac3SSilviu Baranga 
9482dd6ac3SSilviu Baranga     //
9582dd6ac3SSilviu Baranga     // Various property checkers
9682dd6ac3SSilviu Baranga     //
9782dd6ac3SSilviu Baranga     bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
9882dd6ac3SSilviu Baranga     bool hasPartialWrite(MachineInstr *MI);
9982dd6ac3SSilviu Baranga     SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
10082dd6ac3SSilviu Baranga     unsigned getDPRLaneFromSPR(unsigned SReg);
10182dd6ac3SSilviu Baranga 
10282dd6ac3SSilviu Baranga     //
10382dd6ac3SSilviu Baranga     // Methods used for getting the definitions of partial registers
10482dd6ac3SSilviu Baranga     //
10582dd6ac3SSilviu Baranga 
10682dd6ac3SSilviu Baranga     MachineInstr *elideCopies(MachineInstr *MI);
10782dd6ac3SSilviu Baranga     void elideCopiesAndPHIs(MachineInstr *MI,
10882dd6ac3SSilviu Baranga                             SmallVectorImpl<MachineInstr*> &Outs);
10982dd6ac3SSilviu Baranga 
11082dd6ac3SSilviu Baranga     //
11182dd6ac3SSilviu Baranga     // Pattern optimization methods
11282dd6ac3SSilviu Baranga     //
11382dd6ac3SSilviu Baranga     unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
11482dd6ac3SSilviu Baranga     unsigned optimizeSDPattern(MachineInstr *MI);
11582dd6ac3SSilviu Baranga     unsigned getPrefSPRLane(unsigned SReg);
11682dd6ac3SSilviu Baranga 
11782dd6ac3SSilviu Baranga     //
11882dd6ac3SSilviu Baranga     // Sanitizing method - used to make sure if don't leave dead code around.
11982dd6ac3SSilviu Baranga     //
12082dd6ac3SSilviu Baranga     void eraseInstrWithNoUses(MachineInstr *MI);
12182dd6ac3SSilviu Baranga 
12282dd6ac3SSilviu Baranga     //
12382dd6ac3SSilviu Baranga     // A map used to track the changes done by this pass.
12482dd6ac3SSilviu Baranga     //
12582dd6ac3SSilviu Baranga     std::map<MachineInstr*, unsigned> Replacements;
12682dd6ac3SSilviu Baranga     std::set<MachineInstr *> DeadInstr;
12782dd6ac3SSilviu Baranga   };
12882dd6ac3SSilviu Baranga   char A15SDOptimizer::ID = 0;
12982dd6ac3SSilviu Baranga } // end anonymous namespace
13082dd6ac3SSilviu Baranga 
13182dd6ac3SSilviu Baranga // Returns true if this is a use of a SPR register.
usesRegClass(MachineOperand & MO,const TargetRegisterClass * TRC)13282dd6ac3SSilviu Baranga bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
13382dd6ac3SSilviu Baranga                                   const TargetRegisterClass *TRC) {
13482dd6ac3SSilviu Baranga   if (!MO.isReg())
13582dd6ac3SSilviu Baranga     return false;
1360c476111SDaniel Sanders   Register Reg = MO.getReg();
13782dd6ac3SSilviu Baranga 
1382bea69bfSDaniel Sanders   if (Register::isVirtualRegister(Reg))
13982dd6ac3SSilviu Baranga     return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
14082dd6ac3SSilviu Baranga   else
14182dd6ac3SSilviu Baranga     return TRC->contains(Reg);
14282dd6ac3SSilviu Baranga }
14382dd6ac3SSilviu Baranga 
getDPRLaneFromSPR(unsigned SReg)14482dd6ac3SSilviu Baranga unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
14582dd6ac3SSilviu Baranga   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
14682dd6ac3SSilviu Baranga                                            &ARM::DPRRegClass);
14782dd6ac3SSilviu Baranga   if (DReg != ARM::NoRegister) return ARM::ssub_1;
14882dd6ac3SSilviu Baranga   return ARM::ssub_0;
14982dd6ac3SSilviu Baranga }
15082dd6ac3SSilviu Baranga 
15182dd6ac3SSilviu Baranga // Get the subreg type that is most likely to be coalesced
15282dd6ac3SSilviu Baranga // for an SPR register that will be used in VDUP32d pseudo.
getPrefSPRLane(unsigned SReg)15382dd6ac3SSilviu Baranga unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
1542bea69bfSDaniel Sanders   if (!Register::isVirtualRegister(SReg))
15582dd6ac3SSilviu Baranga     return getDPRLaneFromSPR(SReg);
15682dd6ac3SSilviu Baranga 
15782dd6ac3SSilviu Baranga   MachineInstr *MI = MRI->getVRegDef(SReg);
15882dd6ac3SSilviu Baranga   if (!MI) return ARM::ssub_0;
15982dd6ac3SSilviu Baranga   MachineOperand *MO = MI->findRegisterDefOperand(SReg);
16082dd6ac3SSilviu Baranga   if (!MO) return ARM::ssub_0;
161d397e292SSimon Pilgrim   assert(MO->isReg() && "Non-register operand found!");
16282dd6ac3SSilviu Baranga 
16382dd6ac3SSilviu Baranga   if (MI->isCopy() && usesRegClass(MI->getOperand(1),
16482dd6ac3SSilviu Baranga                                     &ARM::SPRRegClass)) {
16582dd6ac3SSilviu Baranga     SReg = MI->getOperand(1).getReg();
16682dd6ac3SSilviu Baranga   }
16782dd6ac3SSilviu Baranga 
1682bea69bfSDaniel Sanders   if (Register::isVirtualRegister(SReg)) {
16982dd6ac3SSilviu Baranga     if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
17082dd6ac3SSilviu Baranga     return ARM::ssub_0;
17182dd6ac3SSilviu Baranga   }
17282dd6ac3SSilviu Baranga   return getDPRLaneFromSPR(SReg);
17382dd6ac3SSilviu Baranga }
17482dd6ac3SSilviu Baranga 
17582dd6ac3SSilviu Baranga // MI is known to be dead. Figure out what instructions
17682dd6ac3SSilviu Baranga // are also made dead by this and mark them for removal.
eraseInstrWithNoUses(MachineInstr * MI)17782dd6ac3SSilviu Baranga void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
17882dd6ac3SSilviu Baranga   SmallVector<MachineInstr *, 8> Front;
17982dd6ac3SSilviu Baranga   DeadInstr.insert(MI);
18082dd6ac3SSilviu Baranga 
181d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
18282dd6ac3SSilviu Baranga   Front.push_back(MI);
18382dd6ac3SSilviu Baranga 
18482dd6ac3SSilviu Baranga   while (Front.size() != 0) {
18584b07c9bSKazu Hirata     MI = Front.pop_back_val();
18682dd6ac3SSilviu Baranga 
18782dd6ac3SSilviu Baranga     // MI is already known to be dead. We need to see
18882dd6ac3SSilviu Baranga     // if other instructions can also be removed.
18937b22865SJaved Absar     for (MachineOperand &MO : MI->operands()) {
19082dd6ac3SSilviu Baranga       if ((!MO.isReg()) || (!MO.isUse()))
19182dd6ac3SSilviu Baranga         continue;
1920c476111SDaniel Sanders       Register Reg = MO.getReg();
1932bea69bfSDaniel Sanders       if (!Register::isVirtualRegister(Reg))
19482dd6ac3SSilviu Baranga         continue;
19582dd6ac3SSilviu Baranga       MachineOperand *Op = MI->findRegisterDefOperand(Reg);
19682dd6ac3SSilviu Baranga 
19782dd6ac3SSilviu Baranga       if (!Op)
19882dd6ac3SSilviu Baranga         continue;
19982dd6ac3SSilviu Baranga 
20082dd6ac3SSilviu Baranga       MachineInstr *Def = Op->getParent();
20182dd6ac3SSilviu Baranga 
20282dd6ac3SSilviu Baranga       // We don't need to do anything if we have already marked
20382dd6ac3SSilviu Baranga       // this instruction as being dead.
20482dd6ac3SSilviu Baranga       if (DeadInstr.find(Def) != DeadInstr.end())
20582dd6ac3SSilviu Baranga         continue;
20682dd6ac3SSilviu Baranga 
20782dd6ac3SSilviu Baranga       // Check if all the uses of this instruction are marked as
20882dd6ac3SSilviu Baranga       // dead. If so, we can also mark this instruction as being
20982dd6ac3SSilviu Baranga       // dead.
21082dd6ac3SSilviu Baranga       bool IsDead = true;
21137b22865SJaved Absar       for (MachineOperand &MODef : Def->operands()) {
21282dd6ac3SSilviu Baranga         if ((!MODef.isReg()) || (!MODef.isDef()))
21382dd6ac3SSilviu Baranga           continue;
2140c476111SDaniel Sanders         Register DefReg = MODef.getReg();
2152bea69bfSDaniel Sanders         if (!Register::isVirtualRegister(DefReg)) {
21682dd6ac3SSilviu Baranga           IsDead = false;
21782dd6ac3SSilviu Baranga           break;
21882dd6ac3SSilviu Baranga         }
21937b22865SJaved Absar         for (MachineInstr &Use : MRI->use_instructions(Reg)) {
22082dd6ac3SSilviu Baranga           // We don't care about self references.
22137b22865SJaved Absar           if (&Use == Def)
22282dd6ac3SSilviu Baranga             continue;
22337b22865SJaved Absar           if (DeadInstr.find(&Use) == DeadInstr.end()) {
22482dd6ac3SSilviu Baranga             IsDead = false;
22582dd6ac3SSilviu Baranga             break;
22682dd6ac3SSilviu Baranga           }
22782dd6ac3SSilviu Baranga         }
22882dd6ac3SSilviu Baranga       }
22982dd6ac3SSilviu Baranga 
23082dd6ac3SSilviu Baranga       if (!IsDead) continue;
23182dd6ac3SSilviu Baranga 
232d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
23382dd6ac3SSilviu Baranga       DeadInstr.insert(Def);
23482dd6ac3SSilviu Baranga     }
23582dd6ac3SSilviu Baranga   }
23682dd6ac3SSilviu Baranga }
23782dd6ac3SSilviu Baranga 
23882dd6ac3SSilviu Baranga // Creates the more optimized patterns and generally does all the code
23982dd6ac3SSilviu Baranga // transformations in this pass.
optimizeSDPattern(MachineInstr * MI)24082dd6ac3SSilviu Baranga unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
24182dd6ac3SSilviu Baranga   if (MI->isCopy()) {
24282dd6ac3SSilviu Baranga     return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
24382dd6ac3SSilviu Baranga   }
24482dd6ac3SSilviu Baranga 
24582dd6ac3SSilviu Baranga   if (MI->isInsertSubreg()) {
2460c476111SDaniel Sanders     Register DPRReg = MI->getOperand(1).getReg();
2470c476111SDaniel Sanders     Register SPRReg = MI->getOperand(2).getReg();
24882dd6ac3SSilviu Baranga 
2492bea69bfSDaniel Sanders     if (Register::isVirtualRegister(DPRReg) && Register::isVirtualRegister(SPRReg)) {
25082dd6ac3SSilviu Baranga       MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
25182dd6ac3SSilviu Baranga       MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
25282dd6ac3SSilviu Baranga 
25382dd6ac3SSilviu Baranga       if (DPRMI && SPRMI) {
25482dd6ac3SSilviu Baranga         // See if the first operand of this insert_subreg is IMPLICIT_DEF
25582dd6ac3SSilviu Baranga         MachineInstr *ECDef = elideCopies(DPRMI);
256062a2baeSCraig Topper         if (ECDef && ECDef->isImplicitDef()) {
25782dd6ac3SSilviu Baranga           // Another corner case - if we're inserting something that is purely
25882dd6ac3SSilviu Baranga           // a subreg copy of a DPR, just use that DPR.
25982dd6ac3SSilviu Baranga 
26082dd6ac3SSilviu Baranga           MachineInstr *EC = elideCopies(SPRMI);
26182dd6ac3SSilviu Baranga           // Is it a subreg copy of ssub_0?
26282dd6ac3SSilviu Baranga           if (EC && EC->isCopy() &&
26382dd6ac3SSilviu Baranga               EC->getOperand(1).getSubReg() == ARM::ssub_0) {
264d34e60caSNicola Zaghen             LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
26582dd6ac3SSilviu Baranga 
26682dd6ac3SSilviu Baranga             // Find the thing we're subreg copying out of - is it of the same
26782dd6ac3SSilviu Baranga             // regclass as DPRMI? (i.e. a DPR or QPR).
2680c476111SDaniel Sanders             Register FullReg = SPRMI->getOperand(1).getReg();
26982dd6ac3SSilviu Baranga             const TargetRegisterClass *TRC =
27082dd6ac3SSilviu Baranga               MRI->getRegClass(MI->getOperand(1).getReg());
27182dd6ac3SSilviu Baranga             if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
272d34e60caSNicola Zaghen               LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
273d34e60caSNicola Zaghen               LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
27482dd6ac3SSilviu Baranga               eraseInstrWithNoUses(MI);
27582dd6ac3SSilviu Baranga               return FullReg;
27682dd6ac3SSilviu Baranga             }
27782dd6ac3SSilviu Baranga           }
27882dd6ac3SSilviu Baranga 
27982dd6ac3SSilviu Baranga           return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
28082dd6ac3SSilviu Baranga         }
28182dd6ac3SSilviu Baranga       }
28282dd6ac3SSilviu Baranga     }
28382dd6ac3SSilviu Baranga     return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
28482dd6ac3SSilviu Baranga   }
28582dd6ac3SSilviu Baranga 
28682dd6ac3SSilviu Baranga   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
28782dd6ac3SSilviu Baranga                                           &ARM::SPRRegClass)) {
28882dd6ac3SSilviu Baranga     // See if all bar one of the operands are IMPLICIT_DEF and insert the
28982dd6ac3SSilviu Baranga     // optimizer pattern accordingly.
29082dd6ac3SSilviu Baranga     unsigned NumImplicit = 0, NumTotal = 0;
29182dd6ac3SSilviu Baranga     unsigned NonImplicitReg = ~0U;
29282dd6ac3SSilviu Baranga 
29382dd6ac3SSilviu Baranga     for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
29482dd6ac3SSilviu Baranga       if (!MI->getOperand(I).isReg())
29582dd6ac3SSilviu Baranga         continue;
29682dd6ac3SSilviu Baranga       ++NumTotal;
2970c476111SDaniel Sanders       Register OpReg = MI->getOperand(I).getReg();
29882dd6ac3SSilviu Baranga 
2992bea69bfSDaniel Sanders       if (!Register::isVirtualRegister(OpReg))
30082dd6ac3SSilviu Baranga         break;
30182dd6ac3SSilviu Baranga 
30282dd6ac3SSilviu Baranga       MachineInstr *Def = MRI->getVRegDef(OpReg);
30382dd6ac3SSilviu Baranga       if (!Def)
30482dd6ac3SSilviu Baranga         break;
30582dd6ac3SSilviu Baranga       if (Def->isImplicitDef())
30682dd6ac3SSilviu Baranga         ++NumImplicit;
30782dd6ac3SSilviu Baranga       else
30882dd6ac3SSilviu Baranga         NonImplicitReg = MI->getOperand(I).getReg();
30982dd6ac3SSilviu Baranga     }
31082dd6ac3SSilviu Baranga 
31182dd6ac3SSilviu Baranga     if (NumImplicit == NumTotal - 1)
31282dd6ac3SSilviu Baranga       return optimizeAllLanesPattern(MI, NonImplicitReg);
31382dd6ac3SSilviu Baranga     else
31482dd6ac3SSilviu Baranga       return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
31582dd6ac3SSilviu Baranga   }
31682dd6ac3SSilviu Baranga 
31735b2f757SCraig Topper   llvm_unreachable("Unhandled update pattern!");
31882dd6ac3SSilviu Baranga }
31982dd6ac3SSilviu Baranga 
32082dd6ac3SSilviu Baranga // Return true if this MachineInstr inserts a scalar (SPR) value into
32182dd6ac3SSilviu Baranga // a D or Q register.
hasPartialWrite(MachineInstr * MI)32282dd6ac3SSilviu Baranga bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
32382dd6ac3SSilviu Baranga   // The only way we can do a partial register update is through a COPY,
32482dd6ac3SSilviu Baranga   // INSERT_SUBREG or REG_SEQUENCE.
32582dd6ac3SSilviu Baranga   if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
32682dd6ac3SSilviu Baranga     return true;
32782dd6ac3SSilviu Baranga 
32882dd6ac3SSilviu Baranga   if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
32982dd6ac3SSilviu Baranga                                            &ARM::SPRRegClass))
33082dd6ac3SSilviu Baranga     return true;
33182dd6ac3SSilviu Baranga 
33282dd6ac3SSilviu Baranga   if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
33382dd6ac3SSilviu Baranga     return true;
33482dd6ac3SSilviu Baranga 
33582dd6ac3SSilviu Baranga   return false;
33682dd6ac3SSilviu Baranga }
33782dd6ac3SSilviu Baranga 
33882dd6ac3SSilviu Baranga // Looks through full copies to get the instruction that defines the input
33982dd6ac3SSilviu Baranga // operand for MI.
elideCopies(MachineInstr * MI)34082dd6ac3SSilviu Baranga MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
34182dd6ac3SSilviu Baranga   if (!MI->isFullCopy())
34282dd6ac3SSilviu Baranga     return MI;
3432bea69bfSDaniel Sanders   if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
344062a2baeSCraig Topper     return nullptr;
34582dd6ac3SSilviu Baranga   MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
34682dd6ac3SSilviu Baranga   if (!Def)
347062a2baeSCraig Topper     return nullptr;
34882dd6ac3SSilviu Baranga   return elideCopies(Def);
34982dd6ac3SSilviu Baranga }
35082dd6ac3SSilviu Baranga 
35182dd6ac3SSilviu Baranga // Look through full copies and PHIs to get the set of non-copy MachineInstrs
35282dd6ac3SSilviu Baranga // that can produce MI.
elideCopiesAndPHIs(MachineInstr * MI,SmallVectorImpl<MachineInstr * > & Outs)35382dd6ac3SSilviu Baranga void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
35482dd6ac3SSilviu Baranga                                         SmallVectorImpl<MachineInstr*> &Outs) {
35582dd6ac3SSilviu Baranga    // Looking through PHIs may create loops so we need to track what
35682dd6ac3SSilviu Baranga    // instructions we have visited before.
35782dd6ac3SSilviu Baranga    std::set<MachineInstr *> Reached;
35882dd6ac3SSilviu Baranga    SmallVector<MachineInstr *, 8> Front;
35982dd6ac3SSilviu Baranga    Front.push_back(MI);
36082dd6ac3SSilviu Baranga    while (Front.size() != 0) {
36116baad8fSKazu Hirata      MI = Front.pop_back_val();
36282dd6ac3SSilviu Baranga 
36382dd6ac3SSilviu Baranga      // If we have already explored this MachineInstr, ignore it.
364*437f9600SKazu Hirata      if (!Reached.insert(MI).second)
36582dd6ac3SSilviu Baranga        continue;
36682dd6ac3SSilviu Baranga      if (MI->isPHI()) {
36782dd6ac3SSilviu Baranga        for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
3680c476111SDaniel Sanders          Register Reg = MI->getOperand(I).getReg();
3692bea69bfSDaniel Sanders          if (!Register::isVirtualRegister(Reg)) {
37082dd6ac3SSilviu Baranga            continue;
37182dd6ac3SSilviu Baranga          }
37282dd6ac3SSilviu Baranga          MachineInstr *NewMI = MRI->getVRegDef(Reg);
37382dd6ac3SSilviu Baranga          if (!NewMI)
37482dd6ac3SSilviu Baranga            continue;
37582dd6ac3SSilviu Baranga          Front.push_back(NewMI);
37682dd6ac3SSilviu Baranga        }
37782dd6ac3SSilviu Baranga      } else if (MI->isFullCopy()) {
3782bea69bfSDaniel Sanders        if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
37982dd6ac3SSilviu Baranga          continue;
38082dd6ac3SSilviu Baranga        MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
38182dd6ac3SSilviu Baranga        if (!NewMI)
38282dd6ac3SSilviu Baranga          continue;
38382dd6ac3SSilviu Baranga        Front.push_back(NewMI);
38482dd6ac3SSilviu Baranga      } else {
385d34e60caSNicola Zaghen        LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
38682dd6ac3SSilviu Baranga        Outs.push_back(MI);
38782dd6ac3SSilviu Baranga      }
38882dd6ac3SSilviu Baranga    }
38982dd6ac3SSilviu Baranga }
39082dd6ac3SSilviu Baranga 
39182dd6ac3SSilviu Baranga // Return the DPR virtual registers that are read by this machine instruction
39282dd6ac3SSilviu Baranga // (if any).
getReadDPRs(MachineInstr * MI)39382dd6ac3SSilviu Baranga SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
39482dd6ac3SSilviu Baranga   if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
39582dd6ac3SSilviu Baranga       MI->isKill())
39682dd6ac3SSilviu Baranga     return SmallVector<unsigned, 8>();
39782dd6ac3SSilviu Baranga 
39882dd6ac3SSilviu Baranga   SmallVector<unsigned, 8> Defs;
39937b22865SJaved Absar   for (MachineOperand &MO : MI->operands()) {
40082dd6ac3SSilviu Baranga     if (!MO.isReg() || !MO.isUse())
40182dd6ac3SSilviu Baranga       continue;
40282dd6ac3SSilviu Baranga     if (!usesRegClass(MO, &ARM::DPRRegClass) &&
40340b5ab8eSHao Liu         !usesRegClass(MO, &ARM::QPRRegClass) &&
40440b5ab8eSHao Liu         !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
40582dd6ac3SSilviu Baranga       continue;
40682dd6ac3SSilviu Baranga 
40782dd6ac3SSilviu Baranga     Defs.push_back(MO.getReg());
40882dd6ac3SSilviu Baranga   }
40982dd6ac3SSilviu Baranga   return Defs;
41082dd6ac3SSilviu Baranga }
41182dd6ac3SSilviu Baranga 
41282dd6ac3SSilviu Baranga // Creates a DPR register from an SPR one by using a VDUP.
createDupLane(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg,unsigned Lane,bool QPR)413bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
41482dd6ac3SSilviu Baranga                                        MachineBasicBlock::iterator InsertBefore,
415bdc4956bSBenjamin Kramer                                        const DebugLoc &DL, unsigned Reg,
416bdc4956bSBenjamin Kramer                                        unsigned Lane, bool QPR) {
4170c476111SDaniel Sanders   Register Out =
4180c476111SDaniel Sanders       MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
4194f8c3e18SDiana Picus   BuildMI(MBB, InsertBefore, DL,
4204f8c3e18SDiana Picus           TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
42182dd6ac3SSilviu Baranga       .addReg(Reg)
4224f8c3e18SDiana Picus       .addImm(Lane)
4234f8c3e18SDiana Picus       .add(predOps(ARMCC::AL));
42482dd6ac3SSilviu Baranga 
42582dd6ac3SSilviu Baranga   return Out;
42682dd6ac3SSilviu Baranga }
42782dd6ac3SSilviu Baranga 
42882dd6ac3SSilviu Baranga // Creates a SPR register from a DPR by copying the value in lane 0.
createExtractSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,const TargetRegisterClass * TRC)429bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createExtractSubreg(
430bdc4956bSBenjamin Kramer     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
431bdc4956bSBenjamin Kramer     const DebugLoc &DL, unsigned DReg, unsigned Lane,
43282dd6ac3SSilviu Baranga     const TargetRegisterClass *TRC) {
4330c476111SDaniel Sanders   Register Out = MRI->createVirtualRegister(TRC);
43482dd6ac3SSilviu Baranga   BuildMI(MBB,
43582dd6ac3SSilviu Baranga           InsertBefore,
43682dd6ac3SSilviu Baranga           DL,
43782dd6ac3SSilviu Baranga           TII->get(TargetOpcode::COPY), Out)
43882dd6ac3SSilviu Baranga     .addReg(DReg, 0, Lane);
43982dd6ac3SSilviu Baranga 
44082dd6ac3SSilviu Baranga   return Out;
44182dd6ac3SSilviu Baranga }
44282dd6ac3SSilviu Baranga 
44382dd6ac3SSilviu Baranga // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
createRegSequence(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg1,unsigned Reg2)444bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createRegSequence(
445bdc4956bSBenjamin Kramer     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
446bdc4956bSBenjamin Kramer     const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
4470c476111SDaniel Sanders   Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
44882dd6ac3SSilviu Baranga   BuildMI(MBB,
44982dd6ac3SSilviu Baranga           InsertBefore,
45082dd6ac3SSilviu Baranga           DL,
45182dd6ac3SSilviu Baranga           TII->get(TargetOpcode::REG_SEQUENCE), Out)
45282dd6ac3SSilviu Baranga     .addReg(Reg1)
45382dd6ac3SSilviu Baranga     .addImm(ARM::dsub_0)
45482dd6ac3SSilviu Baranga     .addReg(Reg2)
45582dd6ac3SSilviu Baranga     .addImm(ARM::dsub_1);
45682dd6ac3SSilviu Baranga   return Out;
45782dd6ac3SSilviu Baranga }
45882dd6ac3SSilviu Baranga 
45982dd6ac3SSilviu Baranga // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
46082dd6ac3SSilviu Baranga // and merges them into one DPR register.
createVExt(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Ssub0,unsigned Ssub1)461bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
46282dd6ac3SSilviu Baranga                                     MachineBasicBlock::iterator InsertBefore,
463bdc4956bSBenjamin Kramer                                     const DebugLoc &DL, unsigned Ssub0,
464bdc4956bSBenjamin Kramer                                     unsigned Ssub1) {
4650c476111SDaniel Sanders   Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4664f8c3e18SDiana Picus   BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
46782dd6ac3SSilviu Baranga       .addReg(Ssub0)
46882dd6ac3SSilviu Baranga       .addReg(Ssub1)
4694f8c3e18SDiana Picus       .addImm(1)
4704f8c3e18SDiana Picus       .add(predOps(ARMCC::AL));
47182dd6ac3SSilviu Baranga   return Out;
47282dd6ac3SSilviu Baranga }
47382dd6ac3SSilviu Baranga 
createInsertSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,unsigned ToInsert)474bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createInsertSubreg(
475bdc4956bSBenjamin Kramer     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
476bdc4956bSBenjamin Kramer     const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
4770c476111SDaniel Sanders   Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
47882dd6ac3SSilviu Baranga   BuildMI(MBB,
47982dd6ac3SSilviu Baranga           InsertBefore,
48082dd6ac3SSilviu Baranga           DL,
48182dd6ac3SSilviu Baranga           TII->get(TargetOpcode::INSERT_SUBREG), Out)
48282dd6ac3SSilviu Baranga     .addReg(DReg)
48382dd6ac3SSilviu Baranga     .addReg(ToInsert)
48482dd6ac3SSilviu Baranga     .addImm(Lane);
48582dd6ac3SSilviu Baranga 
48682dd6ac3SSilviu Baranga   return Out;
48782dd6ac3SSilviu Baranga }
48882dd6ac3SSilviu Baranga 
48982dd6ac3SSilviu Baranga unsigned
createImplicitDef(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL)49082dd6ac3SSilviu Baranga A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
49182dd6ac3SSilviu Baranga                                   MachineBasicBlock::iterator InsertBefore,
492bdc4956bSBenjamin Kramer                                   const DebugLoc &DL) {
4930c476111SDaniel Sanders   Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
49482dd6ac3SSilviu Baranga   BuildMI(MBB,
49582dd6ac3SSilviu Baranga           InsertBefore,
49682dd6ac3SSilviu Baranga           DL,
49782dd6ac3SSilviu Baranga           TII->get(TargetOpcode::IMPLICIT_DEF), Out);
49882dd6ac3SSilviu Baranga   return Out;
49982dd6ac3SSilviu Baranga }
50082dd6ac3SSilviu Baranga 
50182dd6ac3SSilviu Baranga // This function inserts instructions in order to optimize interactions between
50282dd6ac3SSilviu Baranga // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
50382dd6ac3SSilviu Baranga // lanes, and the using VEXT instructions to recompose the result.
50482dd6ac3SSilviu Baranga unsigned
optimizeAllLanesPattern(MachineInstr * MI,unsigned Reg)50582dd6ac3SSilviu Baranga A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
50682dd6ac3SSilviu Baranga   MachineBasicBlock::iterator InsertPt(MI);
50782dd6ac3SSilviu Baranga   DebugLoc DL = MI->getDebugLoc();
50882dd6ac3SSilviu Baranga   MachineBasicBlock &MBB = *MI->getParent();
50982dd6ac3SSilviu Baranga   InsertPt++;
51082dd6ac3SSilviu Baranga   unsigned Out;
51182dd6ac3SSilviu Baranga 
51240b5ab8eSHao Liu   // DPair has the same length as QPR and also has two DPRs as subreg.
51340b5ab8eSHao Liu   // Treat DPair as QPR.
51440b5ab8eSHao Liu   if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
51540b5ab8eSHao Liu       MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
51682dd6ac3SSilviu Baranga     unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
51782dd6ac3SSilviu Baranga                                          ARM::dsub_0, &ARM::DPRRegClass);
51882dd6ac3SSilviu Baranga     unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
51982dd6ac3SSilviu Baranga                                          ARM::dsub_1, &ARM::DPRRegClass);
52082dd6ac3SSilviu Baranga 
52182dd6ac3SSilviu Baranga     unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
52282dd6ac3SSilviu Baranga     unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
52382dd6ac3SSilviu Baranga     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
52482dd6ac3SSilviu Baranga 
52582dd6ac3SSilviu Baranga     unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
52682dd6ac3SSilviu Baranga     unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
52782dd6ac3SSilviu Baranga     Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
52882dd6ac3SSilviu Baranga 
52982dd6ac3SSilviu Baranga     Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
53082dd6ac3SSilviu Baranga 
53182dd6ac3SSilviu Baranga   } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
53282dd6ac3SSilviu Baranga     unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
53382dd6ac3SSilviu Baranga     unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
53482dd6ac3SSilviu Baranga     Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
53582dd6ac3SSilviu Baranga 
53682dd6ac3SSilviu Baranga   } else {
53782dd6ac3SSilviu Baranga     assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
53882dd6ac3SSilviu Baranga            "Found unexpected regclass!");
53982dd6ac3SSilviu Baranga 
54082dd6ac3SSilviu Baranga     unsigned PrefLane = getPrefSPRLane(Reg);
54182dd6ac3SSilviu Baranga     unsigned Lane;
54282dd6ac3SSilviu Baranga     switch (PrefLane) {
54382dd6ac3SSilviu Baranga       case ARM::ssub_0: Lane = 0; break;
54482dd6ac3SSilviu Baranga       case ARM::ssub_1: Lane = 1; break;
54582dd6ac3SSilviu Baranga       default: llvm_unreachable("Unknown preferred lane!");
54682dd6ac3SSilviu Baranga     }
54782dd6ac3SSilviu Baranga 
54840b5ab8eSHao Liu     // Treat DPair as QPR
54940b5ab8eSHao Liu     bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
55040b5ab8eSHao Liu                    usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
55182dd6ac3SSilviu Baranga 
55282dd6ac3SSilviu Baranga     Out = createImplicitDef(MBB, InsertPt, DL);
55382dd6ac3SSilviu Baranga     Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
55482dd6ac3SSilviu Baranga     Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
55582dd6ac3SSilviu Baranga     eraseInstrWithNoUses(MI);
55682dd6ac3SSilviu Baranga   }
55782dd6ac3SSilviu Baranga   return Out;
55882dd6ac3SSilviu Baranga }
55982dd6ac3SSilviu Baranga 
runOnInstruction(MachineInstr * MI)56082dd6ac3SSilviu Baranga bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
56182dd6ac3SSilviu Baranga   // We look for instructions that write S registers that are then read as
56282dd6ac3SSilviu Baranga   // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
56382dd6ac3SSilviu Baranga   // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
56482dd6ac3SSilviu Baranga   // merge two SPR values to form a DPR register.  In order avoid false
56582dd6ac3SSilviu Baranga   // positives we make sure that there is an SPR producer so we look past
56682dd6ac3SSilviu Baranga   // COPY and PHI nodes to find it.
56782dd6ac3SSilviu Baranga   //
56882dd6ac3SSilviu Baranga   // The best code pattern for when an SPR producer is going to be used by a
56982dd6ac3SSilviu Baranga   // DPR or QPR consumer depends on whether the other lanes of the
57082dd6ac3SSilviu Baranga   // corresponding DPR/QPR are currently defined.
57182dd6ac3SSilviu Baranga   //
57282dd6ac3SSilviu Baranga   // We can handle these efficiently, depending on the type of
57382dd6ac3SSilviu Baranga   // pseudo-instruction that is producing the pattern
57482dd6ac3SSilviu Baranga   //
57582dd6ac3SSilviu Baranga   //   * COPY:          * VDUP all lanes and merge the results together
57682dd6ac3SSilviu Baranga   //                      using VEXTs.
57782dd6ac3SSilviu Baranga   //
57882dd6ac3SSilviu Baranga   //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
57982dd6ac3SSilviu Baranga   //                      lane, and the other lane(s) of the DPR/QPR register
58082dd6ac3SSilviu Baranga   //                      that we are inserting in are undefined, use the
58182dd6ac3SSilviu Baranga   //                      original DPR/QPR value.
58282dd6ac3SSilviu Baranga   //                    * Otherwise, fall back on the same stategy as COPY.
58382dd6ac3SSilviu Baranga   //
58482dd6ac3SSilviu Baranga   //   * REG_SEQUENCE:  * If all except one of the input operands are
58582dd6ac3SSilviu Baranga   //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
58682dd6ac3SSilviu Baranga   //                      defined input operand
58782dd6ac3SSilviu Baranga   //                    * Otherwise, fall back on the same stategy as COPY.
58882dd6ac3SSilviu Baranga   //
58982dd6ac3SSilviu Baranga 
59082dd6ac3SSilviu Baranga   // First, get all the reads of D-registers done by this instruction.
59182dd6ac3SSilviu Baranga   SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
59282dd6ac3SSilviu Baranga   bool Modified = false;
59382dd6ac3SSilviu Baranga 
594c5cf7d91SKazu Hirata   for (unsigned I : Defs) {
59582dd6ac3SSilviu Baranga     // Follow the def-use chain for this DPR through COPYs, and also through
59682dd6ac3SSilviu Baranga     // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
59782dd6ac3SSilviu Baranga     // we can end up with multiple defs of this DPR.
59882dd6ac3SSilviu Baranga 
59982dd6ac3SSilviu Baranga     SmallVector<MachineInstr *, 8> DefSrcs;
600c5cf7d91SKazu Hirata     if (!Register::isVirtualRegister(I))
60182dd6ac3SSilviu Baranga       continue;
602c5cf7d91SKazu Hirata     MachineInstr *Def = MRI->getVRegDef(I);
60382dd6ac3SSilviu Baranga     if (!Def)
60482dd6ac3SSilviu Baranga       continue;
60582dd6ac3SSilviu Baranga 
60682dd6ac3SSilviu Baranga     elideCopiesAndPHIs(Def, DefSrcs);
60782dd6ac3SSilviu Baranga 
60837b22865SJaved Absar     for (MachineInstr *MI : DefSrcs) {
60982dd6ac3SSilviu Baranga       // If we've already analyzed and replaced this operand, don't do
61082dd6ac3SSilviu Baranga       // anything.
61182dd6ac3SSilviu Baranga       if (Replacements.find(MI) != Replacements.end())
61282dd6ac3SSilviu Baranga         continue;
61382dd6ac3SSilviu Baranga 
61482dd6ac3SSilviu Baranga       // Now, work out if the instruction causes a SPR->DPR dependency.
61582dd6ac3SSilviu Baranga       if (!hasPartialWrite(MI))
61682dd6ac3SSilviu Baranga         continue;
61782dd6ac3SSilviu Baranga 
61882dd6ac3SSilviu Baranga       // Collect all the uses of this MI's DPR def for updating later.
61982dd6ac3SSilviu Baranga       SmallVector<MachineOperand*, 8> Uses;
6200c476111SDaniel Sanders       Register DPRDefReg = MI->getOperand(0).getReg();
6212ca45adfSKazu Hirata       for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
6222ca45adfSKazu Hirata         Uses.push_back(&MO);
62382dd6ac3SSilviu Baranga 
62482dd6ac3SSilviu Baranga       // We can optimize this.
62582dd6ac3SSilviu Baranga       unsigned NewReg = optimizeSDPattern(MI);
62682dd6ac3SSilviu Baranga 
62782dd6ac3SSilviu Baranga       if (NewReg != 0) {
62882dd6ac3SSilviu Baranga         Modified = true;
629c5cf7d91SKazu Hirata         for (MachineOperand *Use : Uses) {
63013654dd3SJim Grosbach           // Make sure to constrain the register class of the new register to
63113654dd3SJim Grosbach           // match what we're replacing. Otherwise we can optimize a DPR_VFP2
63213654dd3SJim Grosbach           // reference into a plain DPR, and that will end poorly. NewReg is
63313654dd3SJim Grosbach           // always virtual here, so there will always be a matching subclass
63413654dd3SJim Grosbach           // to find.
635c5cf7d91SKazu Hirata           MRI->constrainRegClass(NewReg, MRI->getRegClass(Use->getReg()));
63613654dd3SJim Grosbach 
637c5cf7d91SKazu Hirata           LLVM_DEBUG(dbgs() << "Replacing operand " << *Use << " with "
6389d419d3bSFrancis Visoiu Mistrih                             << printReg(NewReg) << "\n");
639c5cf7d91SKazu Hirata           Use->substVirtReg(NewReg, 0, *TRI);
64082dd6ac3SSilviu Baranga         }
64182dd6ac3SSilviu Baranga       }
64282dd6ac3SSilviu Baranga       Replacements[MI] = NewReg;
64382dd6ac3SSilviu Baranga     }
64482dd6ac3SSilviu Baranga   }
64582dd6ac3SSilviu Baranga   return Modified;
64682dd6ac3SSilviu Baranga }
64782dd6ac3SSilviu Baranga 
runOnMachineFunction(MachineFunction & Fn)64882dd6ac3SSilviu Baranga bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
649f1caa283SMatthias Braun   if (skipFunction(Fn.getFunction()))
650a2b9111eSAndrew Kaylor     return false;
651a2b9111eSAndrew Kaylor 
65263b44882SEric Christopher   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
65363b44882SEric Christopher   // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
65463b44882SEric Christopher   // enabled when NEON is available.
655fffa9b58SEvandro Menezes   if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
65663b44882SEric Christopher     return false;
657fffa9b58SEvandro Menezes 
65863b44882SEric Christopher   TII = STI.getInstrInfo();
65963b44882SEric Christopher   TRI = STI.getRegisterInfo();
66082dd6ac3SSilviu Baranga   MRI = &Fn.getRegInfo();
66182dd6ac3SSilviu Baranga   bool Modified = false;
66282dd6ac3SSilviu Baranga 
663d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
66482dd6ac3SSilviu Baranga 
66582dd6ac3SSilviu Baranga   DeadInstr.clear();
66682dd6ac3SSilviu Baranga   Replacements.clear();
66782dd6ac3SSilviu Baranga 
66837b22865SJaved Absar   for (MachineBasicBlock &MBB : Fn) {
66937b22865SJaved Absar     for (MachineInstr &MI : MBB) {
67037b22865SJaved Absar       Modified |= runOnInstruction(&MI);
67137b22865SJaved Absar     }
67282dd6ac3SSilviu Baranga   }
67382dd6ac3SSilviu Baranga 
67437b22865SJaved Absar   for (MachineInstr *MI : DeadInstr) {
67537b22865SJaved Absar     MI->eraseFromParent();
67682dd6ac3SSilviu Baranga   }
67782dd6ac3SSilviu Baranga 
67882dd6ac3SSilviu Baranga   return Modified;
67982dd6ac3SSilviu Baranga }
68082dd6ac3SSilviu Baranga 
createA15SDOptimizerPass()68182dd6ac3SSilviu Baranga FunctionPass *llvm::createA15SDOptimizerPass() {
68282dd6ac3SSilviu Baranga   return new A15SDOptimizer();
68382dd6ac3SSilviu Baranga }
684