182dd6ac3SSilviu Baranga //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
282dd6ac3SSilviu Baranga //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
682dd6ac3SSilviu Baranga //
782dd6ac3SSilviu Baranga //===----------------------------------------------------------------------===//
882dd6ac3SSilviu Baranga //
982dd6ac3SSilviu Baranga // The Cortex-A15 processor employs a tracking scheme in its register renaming
1082dd6ac3SSilviu Baranga // in order to process each instruction's micro-ops speculatively and
1182dd6ac3SSilviu Baranga // out-of-order with appropriate forwarding. The ARM architecture allows VFP
1282dd6ac3SSilviu Baranga // instructions to read and write 32-bit S-registers. Each S-register
1382dd6ac3SSilviu Baranga // corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
1482dd6ac3SSilviu Baranga //
1582dd6ac3SSilviu Baranga // There are several instruction patterns which can be used to provide this
1682dd6ac3SSilviu Baranga // capability which can provide higher performance than other, potentially more
1782dd6ac3SSilviu Baranga // direct patterns, specifically around when one micro-op reads a D-register
1882dd6ac3SSilviu Baranga // operand that has recently been written as one or more S-register results.
1982dd6ac3SSilviu Baranga //
2082dd6ac3SSilviu Baranga // This file defines a pre-regalloc pass which looks for SPR producers which
2182dd6ac3SSilviu Baranga // are going to be used by a DPR (or QPR) consumers and creates the more
2282dd6ac3SSilviu Baranga // optimized access pattern.
2382dd6ac3SSilviu Baranga //
2482dd6ac3SSilviu Baranga //===----------------------------------------------------------------------===//
2582dd6ac3SSilviu Baranga
2682dd6ac3SSilviu Baranga #include "ARM.h"
2782dd6ac3SSilviu Baranga #include "ARMBaseInstrInfo.h"
28a9253267SCraig Topper #include "ARMBaseRegisterInfo.h"
2963b44882SEric Christopher #include "ARMSubtarget.h"
3082dd6ac3SSilviu Baranga #include "llvm/ADT/Statistic.h"
3163b44882SEric Christopher #include "llvm/CodeGen/MachineFunction.h"
3282dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineFunctionPass.h"
3382dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineInstr.h"
3482dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineInstrBuilder.h"
3582dd6ac3SSilviu Baranga #include "llvm/CodeGen/MachineRegisterInfo.h"
36b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetRegisterInfo.h"
37b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetSubtargetInfo.h"
3882dd6ac3SSilviu Baranga #include "llvm/Support/Debug.h"
39799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
404c67d5a1SEric Christopher #include <map>
4182dd6ac3SSilviu Baranga #include <set>
4282dd6ac3SSilviu Baranga
4382dd6ac3SSilviu Baranga using namespace llvm;
4482dd6ac3SSilviu Baranga
4584e68b29SChandler Carruth #define DEBUG_TYPE "a15-sd-optimizer"
4684e68b29SChandler Carruth
4782dd6ac3SSilviu Baranga namespace {
4882dd6ac3SSilviu Baranga struct A15SDOptimizer : public MachineFunctionPass {
4982dd6ac3SSilviu Baranga static char ID;
A15SDOptimizer__anon766ab5c60111::A15SDOptimizer5082dd6ac3SSilviu Baranga A15SDOptimizer() : MachineFunctionPass(ID) {}
5182dd6ac3SSilviu Baranga
526bc27bf3SCraig Topper bool runOnMachineFunction(MachineFunction &Fn) override;
5382dd6ac3SSilviu Baranga
getPassName__anon766ab5c60111::A15SDOptimizer54117296c0SMehdi Amini StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
5582dd6ac3SSilviu Baranga
5682dd6ac3SSilviu Baranga private:
5782dd6ac3SSilviu Baranga const ARMBaseInstrInfo *TII;
5882dd6ac3SSilviu Baranga const TargetRegisterInfo *TRI;
5982dd6ac3SSilviu Baranga MachineRegisterInfo *MRI;
6082dd6ac3SSilviu Baranga
6182dd6ac3SSilviu Baranga bool runOnInstruction(MachineInstr *MI);
6282dd6ac3SSilviu Baranga
6382dd6ac3SSilviu Baranga //
6482dd6ac3SSilviu Baranga // Instruction builder helpers
6582dd6ac3SSilviu Baranga //
6682dd6ac3SSilviu Baranga unsigned createDupLane(MachineBasicBlock &MBB,
6782dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
68bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Reg, unsigned Lane,
6982dd6ac3SSilviu Baranga bool QPR = false);
7082dd6ac3SSilviu Baranga
7182dd6ac3SSilviu Baranga unsigned createExtractSubreg(MachineBasicBlock &MBB,
7282dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
73bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned DReg,
74bdc4956bSBenjamin Kramer unsigned Lane, const TargetRegisterClass *TRC);
7582dd6ac3SSilviu Baranga
7682dd6ac3SSilviu Baranga unsigned createVExt(MachineBasicBlock &MBB,
7782dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
78bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
7982dd6ac3SSilviu Baranga
8082dd6ac3SSilviu Baranga unsigned createRegSequence(MachineBasicBlock &MBB,
8182dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
82bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Reg1,
83bdc4956bSBenjamin Kramer unsigned Reg2);
8482dd6ac3SSilviu Baranga
8582dd6ac3SSilviu Baranga unsigned createInsertSubreg(MachineBasicBlock &MBB,
8682dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
87bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned DReg,
88bdc4956bSBenjamin Kramer unsigned Lane, unsigned ToInsert);
8982dd6ac3SSilviu Baranga
9082dd6ac3SSilviu Baranga unsigned createImplicitDef(MachineBasicBlock &MBB,
9182dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
92bdc4956bSBenjamin Kramer const DebugLoc &DL);
9382dd6ac3SSilviu Baranga
9482dd6ac3SSilviu Baranga //
9582dd6ac3SSilviu Baranga // Various property checkers
9682dd6ac3SSilviu Baranga //
9782dd6ac3SSilviu Baranga bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
9882dd6ac3SSilviu Baranga bool hasPartialWrite(MachineInstr *MI);
9982dd6ac3SSilviu Baranga SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
10082dd6ac3SSilviu Baranga unsigned getDPRLaneFromSPR(unsigned SReg);
10182dd6ac3SSilviu Baranga
10282dd6ac3SSilviu Baranga //
10382dd6ac3SSilviu Baranga // Methods used for getting the definitions of partial registers
10482dd6ac3SSilviu Baranga //
10582dd6ac3SSilviu Baranga
10682dd6ac3SSilviu Baranga MachineInstr *elideCopies(MachineInstr *MI);
10782dd6ac3SSilviu Baranga void elideCopiesAndPHIs(MachineInstr *MI,
10882dd6ac3SSilviu Baranga SmallVectorImpl<MachineInstr*> &Outs);
10982dd6ac3SSilviu Baranga
11082dd6ac3SSilviu Baranga //
11182dd6ac3SSilviu Baranga // Pattern optimization methods
11282dd6ac3SSilviu Baranga //
11382dd6ac3SSilviu Baranga unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
11482dd6ac3SSilviu Baranga unsigned optimizeSDPattern(MachineInstr *MI);
11582dd6ac3SSilviu Baranga unsigned getPrefSPRLane(unsigned SReg);
11682dd6ac3SSilviu Baranga
11782dd6ac3SSilviu Baranga //
11882dd6ac3SSilviu Baranga // Sanitizing method - used to make sure if don't leave dead code around.
11982dd6ac3SSilviu Baranga //
12082dd6ac3SSilviu Baranga void eraseInstrWithNoUses(MachineInstr *MI);
12182dd6ac3SSilviu Baranga
12282dd6ac3SSilviu Baranga //
12382dd6ac3SSilviu Baranga // A map used to track the changes done by this pass.
12482dd6ac3SSilviu Baranga //
12582dd6ac3SSilviu Baranga std::map<MachineInstr*, unsigned> Replacements;
12682dd6ac3SSilviu Baranga std::set<MachineInstr *> DeadInstr;
12782dd6ac3SSilviu Baranga };
12882dd6ac3SSilviu Baranga char A15SDOptimizer::ID = 0;
12982dd6ac3SSilviu Baranga } // end anonymous namespace
13082dd6ac3SSilviu Baranga
13182dd6ac3SSilviu Baranga // Returns true if this is a use of a SPR register.
usesRegClass(MachineOperand & MO,const TargetRegisterClass * TRC)13282dd6ac3SSilviu Baranga bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
13382dd6ac3SSilviu Baranga const TargetRegisterClass *TRC) {
13482dd6ac3SSilviu Baranga if (!MO.isReg())
13582dd6ac3SSilviu Baranga return false;
1360c476111SDaniel Sanders Register Reg = MO.getReg();
13782dd6ac3SSilviu Baranga
1382bea69bfSDaniel Sanders if (Register::isVirtualRegister(Reg))
13982dd6ac3SSilviu Baranga return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
14082dd6ac3SSilviu Baranga else
14182dd6ac3SSilviu Baranga return TRC->contains(Reg);
14282dd6ac3SSilviu Baranga }
14382dd6ac3SSilviu Baranga
getDPRLaneFromSPR(unsigned SReg)14482dd6ac3SSilviu Baranga unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
14582dd6ac3SSilviu Baranga unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
14682dd6ac3SSilviu Baranga &ARM::DPRRegClass);
14782dd6ac3SSilviu Baranga if (DReg != ARM::NoRegister) return ARM::ssub_1;
14882dd6ac3SSilviu Baranga return ARM::ssub_0;
14982dd6ac3SSilviu Baranga }
15082dd6ac3SSilviu Baranga
15182dd6ac3SSilviu Baranga // Get the subreg type that is most likely to be coalesced
15282dd6ac3SSilviu Baranga // for an SPR register that will be used in VDUP32d pseudo.
getPrefSPRLane(unsigned SReg)15382dd6ac3SSilviu Baranga unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
1542bea69bfSDaniel Sanders if (!Register::isVirtualRegister(SReg))
15582dd6ac3SSilviu Baranga return getDPRLaneFromSPR(SReg);
15682dd6ac3SSilviu Baranga
15782dd6ac3SSilviu Baranga MachineInstr *MI = MRI->getVRegDef(SReg);
15882dd6ac3SSilviu Baranga if (!MI) return ARM::ssub_0;
15982dd6ac3SSilviu Baranga MachineOperand *MO = MI->findRegisterDefOperand(SReg);
16082dd6ac3SSilviu Baranga if (!MO) return ARM::ssub_0;
161d397e292SSimon Pilgrim assert(MO->isReg() && "Non-register operand found!");
16282dd6ac3SSilviu Baranga
16382dd6ac3SSilviu Baranga if (MI->isCopy() && usesRegClass(MI->getOperand(1),
16482dd6ac3SSilviu Baranga &ARM::SPRRegClass)) {
16582dd6ac3SSilviu Baranga SReg = MI->getOperand(1).getReg();
16682dd6ac3SSilviu Baranga }
16782dd6ac3SSilviu Baranga
1682bea69bfSDaniel Sanders if (Register::isVirtualRegister(SReg)) {
16982dd6ac3SSilviu Baranga if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
17082dd6ac3SSilviu Baranga return ARM::ssub_0;
17182dd6ac3SSilviu Baranga }
17282dd6ac3SSilviu Baranga return getDPRLaneFromSPR(SReg);
17382dd6ac3SSilviu Baranga }
17482dd6ac3SSilviu Baranga
17582dd6ac3SSilviu Baranga // MI is known to be dead. Figure out what instructions
17682dd6ac3SSilviu Baranga // are also made dead by this and mark them for removal.
eraseInstrWithNoUses(MachineInstr * MI)17782dd6ac3SSilviu Baranga void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
17882dd6ac3SSilviu Baranga SmallVector<MachineInstr *, 8> Front;
17982dd6ac3SSilviu Baranga DeadInstr.insert(MI);
18082dd6ac3SSilviu Baranga
181d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
18282dd6ac3SSilviu Baranga Front.push_back(MI);
18382dd6ac3SSilviu Baranga
18482dd6ac3SSilviu Baranga while (Front.size() != 0) {
18584b07c9bSKazu Hirata MI = Front.pop_back_val();
18682dd6ac3SSilviu Baranga
18782dd6ac3SSilviu Baranga // MI is already known to be dead. We need to see
18882dd6ac3SSilviu Baranga // if other instructions can also be removed.
18937b22865SJaved Absar for (MachineOperand &MO : MI->operands()) {
19082dd6ac3SSilviu Baranga if ((!MO.isReg()) || (!MO.isUse()))
19182dd6ac3SSilviu Baranga continue;
1920c476111SDaniel Sanders Register Reg = MO.getReg();
1932bea69bfSDaniel Sanders if (!Register::isVirtualRegister(Reg))
19482dd6ac3SSilviu Baranga continue;
19582dd6ac3SSilviu Baranga MachineOperand *Op = MI->findRegisterDefOperand(Reg);
19682dd6ac3SSilviu Baranga
19782dd6ac3SSilviu Baranga if (!Op)
19882dd6ac3SSilviu Baranga continue;
19982dd6ac3SSilviu Baranga
20082dd6ac3SSilviu Baranga MachineInstr *Def = Op->getParent();
20182dd6ac3SSilviu Baranga
20282dd6ac3SSilviu Baranga // We don't need to do anything if we have already marked
20382dd6ac3SSilviu Baranga // this instruction as being dead.
20482dd6ac3SSilviu Baranga if (DeadInstr.find(Def) != DeadInstr.end())
20582dd6ac3SSilviu Baranga continue;
20682dd6ac3SSilviu Baranga
20782dd6ac3SSilviu Baranga // Check if all the uses of this instruction are marked as
20882dd6ac3SSilviu Baranga // dead. If so, we can also mark this instruction as being
20982dd6ac3SSilviu Baranga // dead.
21082dd6ac3SSilviu Baranga bool IsDead = true;
21137b22865SJaved Absar for (MachineOperand &MODef : Def->operands()) {
21282dd6ac3SSilviu Baranga if ((!MODef.isReg()) || (!MODef.isDef()))
21382dd6ac3SSilviu Baranga continue;
2140c476111SDaniel Sanders Register DefReg = MODef.getReg();
2152bea69bfSDaniel Sanders if (!Register::isVirtualRegister(DefReg)) {
21682dd6ac3SSilviu Baranga IsDead = false;
21782dd6ac3SSilviu Baranga break;
21882dd6ac3SSilviu Baranga }
21937b22865SJaved Absar for (MachineInstr &Use : MRI->use_instructions(Reg)) {
22082dd6ac3SSilviu Baranga // We don't care about self references.
22137b22865SJaved Absar if (&Use == Def)
22282dd6ac3SSilviu Baranga continue;
22337b22865SJaved Absar if (DeadInstr.find(&Use) == DeadInstr.end()) {
22482dd6ac3SSilviu Baranga IsDead = false;
22582dd6ac3SSilviu Baranga break;
22682dd6ac3SSilviu Baranga }
22782dd6ac3SSilviu Baranga }
22882dd6ac3SSilviu Baranga }
22982dd6ac3SSilviu Baranga
23082dd6ac3SSilviu Baranga if (!IsDead) continue;
23182dd6ac3SSilviu Baranga
232d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
23382dd6ac3SSilviu Baranga DeadInstr.insert(Def);
23482dd6ac3SSilviu Baranga }
23582dd6ac3SSilviu Baranga }
23682dd6ac3SSilviu Baranga }
23782dd6ac3SSilviu Baranga
23882dd6ac3SSilviu Baranga // Creates the more optimized patterns and generally does all the code
23982dd6ac3SSilviu Baranga // transformations in this pass.
optimizeSDPattern(MachineInstr * MI)24082dd6ac3SSilviu Baranga unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
24182dd6ac3SSilviu Baranga if (MI->isCopy()) {
24282dd6ac3SSilviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
24382dd6ac3SSilviu Baranga }
24482dd6ac3SSilviu Baranga
24582dd6ac3SSilviu Baranga if (MI->isInsertSubreg()) {
2460c476111SDaniel Sanders Register DPRReg = MI->getOperand(1).getReg();
2470c476111SDaniel Sanders Register SPRReg = MI->getOperand(2).getReg();
24882dd6ac3SSilviu Baranga
2492bea69bfSDaniel Sanders if (Register::isVirtualRegister(DPRReg) && Register::isVirtualRegister(SPRReg)) {
25082dd6ac3SSilviu Baranga MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
25182dd6ac3SSilviu Baranga MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
25282dd6ac3SSilviu Baranga
25382dd6ac3SSilviu Baranga if (DPRMI && SPRMI) {
25482dd6ac3SSilviu Baranga // See if the first operand of this insert_subreg is IMPLICIT_DEF
25582dd6ac3SSilviu Baranga MachineInstr *ECDef = elideCopies(DPRMI);
256062a2baeSCraig Topper if (ECDef && ECDef->isImplicitDef()) {
25782dd6ac3SSilviu Baranga // Another corner case - if we're inserting something that is purely
25882dd6ac3SSilviu Baranga // a subreg copy of a DPR, just use that DPR.
25982dd6ac3SSilviu Baranga
26082dd6ac3SSilviu Baranga MachineInstr *EC = elideCopies(SPRMI);
26182dd6ac3SSilviu Baranga // Is it a subreg copy of ssub_0?
26282dd6ac3SSilviu Baranga if (EC && EC->isCopy() &&
26382dd6ac3SSilviu Baranga EC->getOperand(1).getSubReg() == ARM::ssub_0) {
264d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
26582dd6ac3SSilviu Baranga
26682dd6ac3SSilviu Baranga // Find the thing we're subreg copying out of - is it of the same
26782dd6ac3SSilviu Baranga // regclass as DPRMI? (i.e. a DPR or QPR).
2680c476111SDaniel Sanders Register FullReg = SPRMI->getOperand(1).getReg();
26982dd6ac3SSilviu Baranga const TargetRegisterClass *TRC =
27082dd6ac3SSilviu Baranga MRI->getRegClass(MI->getOperand(1).getReg());
27182dd6ac3SSilviu Baranga if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
272d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Subreg copy is compatible - returning ");
273d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << printReg(FullReg) << "\n");
27482dd6ac3SSilviu Baranga eraseInstrWithNoUses(MI);
27582dd6ac3SSilviu Baranga return FullReg;
27682dd6ac3SSilviu Baranga }
27782dd6ac3SSilviu Baranga }
27882dd6ac3SSilviu Baranga
27982dd6ac3SSilviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
28082dd6ac3SSilviu Baranga }
28182dd6ac3SSilviu Baranga }
28282dd6ac3SSilviu Baranga }
28382dd6ac3SSilviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
28482dd6ac3SSilviu Baranga }
28582dd6ac3SSilviu Baranga
28682dd6ac3SSilviu Baranga if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
28782dd6ac3SSilviu Baranga &ARM::SPRRegClass)) {
28882dd6ac3SSilviu Baranga // See if all bar one of the operands are IMPLICIT_DEF and insert the
28982dd6ac3SSilviu Baranga // optimizer pattern accordingly.
29082dd6ac3SSilviu Baranga unsigned NumImplicit = 0, NumTotal = 0;
29182dd6ac3SSilviu Baranga unsigned NonImplicitReg = ~0U;
29282dd6ac3SSilviu Baranga
29382dd6ac3SSilviu Baranga for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
29482dd6ac3SSilviu Baranga if (!MI->getOperand(I).isReg())
29582dd6ac3SSilviu Baranga continue;
29682dd6ac3SSilviu Baranga ++NumTotal;
2970c476111SDaniel Sanders Register OpReg = MI->getOperand(I).getReg();
29882dd6ac3SSilviu Baranga
2992bea69bfSDaniel Sanders if (!Register::isVirtualRegister(OpReg))
30082dd6ac3SSilviu Baranga break;
30182dd6ac3SSilviu Baranga
30282dd6ac3SSilviu Baranga MachineInstr *Def = MRI->getVRegDef(OpReg);
30382dd6ac3SSilviu Baranga if (!Def)
30482dd6ac3SSilviu Baranga break;
30582dd6ac3SSilviu Baranga if (Def->isImplicitDef())
30682dd6ac3SSilviu Baranga ++NumImplicit;
30782dd6ac3SSilviu Baranga else
30882dd6ac3SSilviu Baranga NonImplicitReg = MI->getOperand(I).getReg();
30982dd6ac3SSilviu Baranga }
31082dd6ac3SSilviu Baranga
31182dd6ac3SSilviu Baranga if (NumImplicit == NumTotal - 1)
31282dd6ac3SSilviu Baranga return optimizeAllLanesPattern(MI, NonImplicitReg);
31382dd6ac3SSilviu Baranga else
31482dd6ac3SSilviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
31582dd6ac3SSilviu Baranga }
31682dd6ac3SSilviu Baranga
31735b2f757SCraig Topper llvm_unreachable("Unhandled update pattern!");
31882dd6ac3SSilviu Baranga }
31982dd6ac3SSilviu Baranga
32082dd6ac3SSilviu Baranga // Return true if this MachineInstr inserts a scalar (SPR) value into
32182dd6ac3SSilviu Baranga // a D or Q register.
hasPartialWrite(MachineInstr * MI)32282dd6ac3SSilviu Baranga bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
32382dd6ac3SSilviu Baranga // The only way we can do a partial register update is through a COPY,
32482dd6ac3SSilviu Baranga // INSERT_SUBREG or REG_SEQUENCE.
32582dd6ac3SSilviu Baranga if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
32682dd6ac3SSilviu Baranga return true;
32782dd6ac3SSilviu Baranga
32882dd6ac3SSilviu Baranga if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
32982dd6ac3SSilviu Baranga &ARM::SPRRegClass))
33082dd6ac3SSilviu Baranga return true;
33182dd6ac3SSilviu Baranga
33282dd6ac3SSilviu Baranga if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
33382dd6ac3SSilviu Baranga return true;
33482dd6ac3SSilviu Baranga
33582dd6ac3SSilviu Baranga return false;
33682dd6ac3SSilviu Baranga }
33782dd6ac3SSilviu Baranga
33882dd6ac3SSilviu Baranga // Looks through full copies to get the instruction that defines the input
33982dd6ac3SSilviu Baranga // operand for MI.
elideCopies(MachineInstr * MI)34082dd6ac3SSilviu Baranga MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
34182dd6ac3SSilviu Baranga if (!MI->isFullCopy())
34282dd6ac3SSilviu Baranga return MI;
3432bea69bfSDaniel Sanders if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
344062a2baeSCraig Topper return nullptr;
34582dd6ac3SSilviu Baranga MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
34682dd6ac3SSilviu Baranga if (!Def)
347062a2baeSCraig Topper return nullptr;
34882dd6ac3SSilviu Baranga return elideCopies(Def);
34982dd6ac3SSilviu Baranga }
35082dd6ac3SSilviu Baranga
35182dd6ac3SSilviu Baranga // Look through full copies and PHIs to get the set of non-copy MachineInstrs
35282dd6ac3SSilviu Baranga // that can produce MI.
elideCopiesAndPHIs(MachineInstr * MI,SmallVectorImpl<MachineInstr * > & Outs)35382dd6ac3SSilviu Baranga void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
35482dd6ac3SSilviu Baranga SmallVectorImpl<MachineInstr*> &Outs) {
35582dd6ac3SSilviu Baranga // Looking through PHIs may create loops so we need to track what
35682dd6ac3SSilviu Baranga // instructions we have visited before.
35782dd6ac3SSilviu Baranga std::set<MachineInstr *> Reached;
35882dd6ac3SSilviu Baranga SmallVector<MachineInstr *, 8> Front;
35982dd6ac3SSilviu Baranga Front.push_back(MI);
36082dd6ac3SSilviu Baranga while (Front.size() != 0) {
36116baad8fSKazu Hirata MI = Front.pop_back_val();
36282dd6ac3SSilviu Baranga
36382dd6ac3SSilviu Baranga // If we have already explored this MachineInstr, ignore it.
364*437f9600SKazu Hirata if (!Reached.insert(MI).second)
36582dd6ac3SSilviu Baranga continue;
36682dd6ac3SSilviu Baranga if (MI->isPHI()) {
36782dd6ac3SSilviu Baranga for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
3680c476111SDaniel Sanders Register Reg = MI->getOperand(I).getReg();
3692bea69bfSDaniel Sanders if (!Register::isVirtualRegister(Reg)) {
37082dd6ac3SSilviu Baranga continue;
37182dd6ac3SSilviu Baranga }
37282dd6ac3SSilviu Baranga MachineInstr *NewMI = MRI->getVRegDef(Reg);
37382dd6ac3SSilviu Baranga if (!NewMI)
37482dd6ac3SSilviu Baranga continue;
37582dd6ac3SSilviu Baranga Front.push_back(NewMI);
37682dd6ac3SSilviu Baranga }
37782dd6ac3SSilviu Baranga } else if (MI->isFullCopy()) {
3782bea69bfSDaniel Sanders if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
37982dd6ac3SSilviu Baranga continue;
38082dd6ac3SSilviu Baranga MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
38182dd6ac3SSilviu Baranga if (!NewMI)
38282dd6ac3SSilviu Baranga continue;
38382dd6ac3SSilviu Baranga Front.push_back(NewMI);
38482dd6ac3SSilviu Baranga } else {
385d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Found partial copy" << *MI << "\n");
38682dd6ac3SSilviu Baranga Outs.push_back(MI);
38782dd6ac3SSilviu Baranga }
38882dd6ac3SSilviu Baranga }
38982dd6ac3SSilviu Baranga }
39082dd6ac3SSilviu Baranga
39182dd6ac3SSilviu Baranga // Return the DPR virtual registers that are read by this machine instruction
39282dd6ac3SSilviu Baranga // (if any).
getReadDPRs(MachineInstr * MI)39382dd6ac3SSilviu Baranga SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
39482dd6ac3SSilviu Baranga if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
39582dd6ac3SSilviu Baranga MI->isKill())
39682dd6ac3SSilviu Baranga return SmallVector<unsigned, 8>();
39782dd6ac3SSilviu Baranga
39882dd6ac3SSilviu Baranga SmallVector<unsigned, 8> Defs;
39937b22865SJaved Absar for (MachineOperand &MO : MI->operands()) {
40082dd6ac3SSilviu Baranga if (!MO.isReg() || !MO.isUse())
40182dd6ac3SSilviu Baranga continue;
40282dd6ac3SSilviu Baranga if (!usesRegClass(MO, &ARM::DPRRegClass) &&
40340b5ab8eSHao Liu !usesRegClass(MO, &ARM::QPRRegClass) &&
40440b5ab8eSHao Liu !usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
40582dd6ac3SSilviu Baranga continue;
40682dd6ac3SSilviu Baranga
40782dd6ac3SSilviu Baranga Defs.push_back(MO.getReg());
40882dd6ac3SSilviu Baranga }
40982dd6ac3SSilviu Baranga return Defs;
41082dd6ac3SSilviu Baranga }
41182dd6ac3SSilviu Baranga
41282dd6ac3SSilviu Baranga // Creates a DPR register from an SPR one by using a VDUP.
createDupLane(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg,unsigned Lane,bool QPR)413bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
41482dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
415bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Reg,
416bdc4956bSBenjamin Kramer unsigned Lane, bool QPR) {
4170c476111SDaniel Sanders Register Out =
4180c476111SDaniel Sanders MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass);
4194f8c3e18SDiana Picus BuildMI(MBB, InsertBefore, DL,
4204f8c3e18SDiana Picus TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
42182dd6ac3SSilviu Baranga .addReg(Reg)
4224f8c3e18SDiana Picus .addImm(Lane)
4234f8c3e18SDiana Picus .add(predOps(ARMCC::AL));
42482dd6ac3SSilviu Baranga
42582dd6ac3SSilviu Baranga return Out;
42682dd6ac3SSilviu Baranga }
42782dd6ac3SSilviu Baranga
42882dd6ac3SSilviu Baranga // Creates a SPR register from a DPR by copying the value in lane 0.
createExtractSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,const TargetRegisterClass * TRC)429bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createExtractSubreg(
430bdc4956bSBenjamin Kramer MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
431bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned DReg, unsigned Lane,
43282dd6ac3SSilviu Baranga const TargetRegisterClass *TRC) {
4330c476111SDaniel Sanders Register Out = MRI->createVirtualRegister(TRC);
43482dd6ac3SSilviu Baranga BuildMI(MBB,
43582dd6ac3SSilviu Baranga InsertBefore,
43682dd6ac3SSilviu Baranga DL,
43782dd6ac3SSilviu Baranga TII->get(TargetOpcode::COPY), Out)
43882dd6ac3SSilviu Baranga .addReg(DReg, 0, Lane);
43982dd6ac3SSilviu Baranga
44082dd6ac3SSilviu Baranga return Out;
44182dd6ac3SSilviu Baranga }
44282dd6ac3SSilviu Baranga
44382dd6ac3SSilviu Baranga // Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
createRegSequence(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Reg1,unsigned Reg2)444bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createRegSequence(
445bdc4956bSBenjamin Kramer MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
446bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
4470c476111SDaniel Sanders Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
44882dd6ac3SSilviu Baranga BuildMI(MBB,
44982dd6ac3SSilviu Baranga InsertBefore,
45082dd6ac3SSilviu Baranga DL,
45182dd6ac3SSilviu Baranga TII->get(TargetOpcode::REG_SEQUENCE), Out)
45282dd6ac3SSilviu Baranga .addReg(Reg1)
45382dd6ac3SSilviu Baranga .addImm(ARM::dsub_0)
45482dd6ac3SSilviu Baranga .addReg(Reg2)
45582dd6ac3SSilviu Baranga .addImm(ARM::dsub_1);
45682dd6ac3SSilviu Baranga return Out;
45782dd6ac3SSilviu Baranga }
45882dd6ac3SSilviu Baranga
45982dd6ac3SSilviu Baranga // Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
46082dd6ac3SSilviu Baranga // and merges them into one DPR register.
createVExt(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned Ssub0,unsigned Ssub1)461bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
46282dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
463bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned Ssub0,
464bdc4956bSBenjamin Kramer unsigned Ssub1) {
4650c476111SDaniel Sanders Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
4664f8c3e18SDiana Picus BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
46782dd6ac3SSilviu Baranga .addReg(Ssub0)
46882dd6ac3SSilviu Baranga .addReg(Ssub1)
4694f8c3e18SDiana Picus .addImm(1)
4704f8c3e18SDiana Picus .add(predOps(ARMCC::AL));
47182dd6ac3SSilviu Baranga return Out;
47282dd6ac3SSilviu Baranga }
47382dd6ac3SSilviu Baranga
createInsertSubreg(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL,unsigned DReg,unsigned Lane,unsigned ToInsert)474bdc4956bSBenjamin Kramer unsigned A15SDOptimizer::createInsertSubreg(
475bdc4956bSBenjamin Kramer MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
476bdc4956bSBenjamin Kramer const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
4770c476111SDaniel Sanders Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
47882dd6ac3SSilviu Baranga BuildMI(MBB,
47982dd6ac3SSilviu Baranga InsertBefore,
48082dd6ac3SSilviu Baranga DL,
48182dd6ac3SSilviu Baranga TII->get(TargetOpcode::INSERT_SUBREG), Out)
48282dd6ac3SSilviu Baranga .addReg(DReg)
48382dd6ac3SSilviu Baranga .addReg(ToInsert)
48482dd6ac3SSilviu Baranga .addImm(Lane);
48582dd6ac3SSilviu Baranga
48682dd6ac3SSilviu Baranga return Out;
48782dd6ac3SSilviu Baranga }
48882dd6ac3SSilviu Baranga
48982dd6ac3SSilviu Baranga unsigned
createImplicitDef(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const DebugLoc & DL)49082dd6ac3SSilviu Baranga A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
49182dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertBefore,
492bdc4956bSBenjamin Kramer const DebugLoc &DL) {
4930c476111SDaniel Sanders Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
49482dd6ac3SSilviu Baranga BuildMI(MBB,
49582dd6ac3SSilviu Baranga InsertBefore,
49682dd6ac3SSilviu Baranga DL,
49782dd6ac3SSilviu Baranga TII->get(TargetOpcode::IMPLICIT_DEF), Out);
49882dd6ac3SSilviu Baranga return Out;
49982dd6ac3SSilviu Baranga }
50082dd6ac3SSilviu Baranga
50182dd6ac3SSilviu Baranga // This function inserts instructions in order to optimize interactions between
50282dd6ac3SSilviu Baranga // SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
50382dd6ac3SSilviu Baranga // lanes, and the using VEXT instructions to recompose the result.
50482dd6ac3SSilviu Baranga unsigned
optimizeAllLanesPattern(MachineInstr * MI,unsigned Reg)50582dd6ac3SSilviu Baranga A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
50682dd6ac3SSilviu Baranga MachineBasicBlock::iterator InsertPt(MI);
50782dd6ac3SSilviu Baranga DebugLoc DL = MI->getDebugLoc();
50882dd6ac3SSilviu Baranga MachineBasicBlock &MBB = *MI->getParent();
50982dd6ac3SSilviu Baranga InsertPt++;
51082dd6ac3SSilviu Baranga unsigned Out;
51182dd6ac3SSilviu Baranga
51240b5ab8eSHao Liu // DPair has the same length as QPR and also has two DPRs as subreg.
51340b5ab8eSHao Liu // Treat DPair as QPR.
51440b5ab8eSHao Liu if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
51540b5ab8eSHao Liu MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
51682dd6ac3SSilviu Baranga unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
51782dd6ac3SSilviu Baranga ARM::dsub_0, &ARM::DPRRegClass);
51882dd6ac3SSilviu Baranga unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
51982dd6ac3SSilviu Baranga ARM::dsub_1, &ARM::DPRRegClass);
52082dd6ac3SSilviu Baranga
52182dd6ac3SSilviu Baranga unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
52282dd6ac3SSilviu Baranga unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
52382dd6ac3SSilviu Baranga Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
52482dd6ac3SSilviu Baranga
52582dd6ac3SSilviu Baranga unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
52682dd6ac3SSilviu Baranga unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
52782dd6ac3SSilviu Baranga Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
52882dd6ac3SSilviu Baranga
52982dd6ac3SSilviu Baranga Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
53082dd6ac3SSilviu Baranga
53182dd6ac3SSilviu Baranga } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
53282dd6ac3SSilviu Baranga unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
53382dd6ac3SSilviu Baranga unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
53482dd6ac3SSilviu Baranga Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
53582dd6ac3SSilviu Baranga
53682dd6ac3SSilviu Baranga } else {
53782dd6ac3SSilviu Baranga assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
53882dd6ac3SSilviu Baranga "Found unexpected regclass!");
53982dd6ac3SSilviu Baranga
54082dd6ac3SSilviu Baranga unsigned PrefLane = getPrefSPRLane(Reg);
54182dd6ac3SSilviu Baranga unsigned Lane;
54282dd6ac3SSilviu Baranga switch (PrefLane) {
54382dd6ac3SSilviu Baranga case ARM::ssub_0: Lane = 0; break;
54482dd6ac3SSilviu Baranga case ARM::ssub_1: Lane = 1; break;
54582dd6ac3SSilviu Baranga default: llvm_unreachable("Unknown preferred lane!");
54682dd6ac3SSilviu Baranga }
54782dd6ac3SSilviu Baranga
54840b5ab8eSHao Liu // Treat DPair as QPR
54940b5ab8eSHao Liu bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
55040b5ab8eSHao Liu usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
55182dd6ac3SSilviu Baranga
55282dd6ac3SSilviu Baranga Out = createImplicitDef(MBB, InsertPt, DL);
55382dd6ac3SSilviu Baranga Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
55482dd6ac3SSilviu Baranga Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
55582dd6ac3SSilviu Baranga eraseInstrWithNoUses(MI);
55682dd6ac3SSilviu Baranga }
55782dd6ac3SSilviu Baranga return Out;
55882dd6ac3SSilviu Baranga }
55982dd6ac3SSilviu Baranga
runOnInstruction(MachineInstr * MI)56082dd6ac3SSilviu Baranga bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
56182dd6ac3SSilviu Baranga // We look for instructions that write S registers that are then read as
56282dd6ac3SSilviu Baranga // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
56382dd6ac3SSilviu Baranga // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
56482dd6ac3SSilviu Baranga // merge two SPR values to form a DPR register. In order avoid false
56582dd6ac3SSilviu Baranga // positives we make sure that there is an SPR producer so we look past
56682dd6ac3SSilviu Baranga // COPY and PHI nodes to find it.
56782dd6ac3SSilviu Baranga //
56882dd6ac3SSilviu Baranga // The best code pattern for when an SPR producer is going to be used by a
56982dd6ac3SSilviu Baranga // DPR or QPR consumer depends on whether the other lanes of the
57082dd6ac3SSilviu Baranga // corresponding DPR/QPR are currently defined.
57182dd6ac3SSilviu Baranga //
57282dd6ac3SSilviu Baranga // We can handle these efficiently, depending on the type of
57382dd6ac3SSilviu Baranga // pseudo-instruction that is producing the pattern
57482dd6ac3SSilviu Baranga //
57582dd6ac3SSilviu Baranga // * COPY: * VDUP all lanes and merge the results together
57682dd6ac3SSilviu Baranga // using VEXTs.
57782dd6ac3SSilviu Baranga //
57882dd6ac3SSilviu Baranga // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
57982dd6ac3SSilviu Baranga // lane, and the other lane(s) of the DPR/QPR register
58082dd6ac3SSilviu Baranga // that we are inserting in are undefined, use the
58182dd6ac3SSilviu Baranga // original DPR/QPR value.
58282dd6ac3SSilviu Baranga // * Otherwise, fall back on the same stategy as COPY.
58382dd6ac3SSilviu Baranga //
58482dd6ac3SSilviu Baranga // * REG_SEQUENCE: * If all except one of the input operands are
58582dd6ac3SSilviu Baranga // IMPLICIT_DEFs, insert the VDUP pattern for just the
58682dd6ac3SSilviu Baranga // defined input operand
58782dd6ac3SSilviu Baranga // * Otherwise, fall back on the same stategy as COPY.
58882dd6ac3SSilviu Baranga //
58982dd6ac3SSilviu Baranga
59082dd6ac3SSilviu Baranga // First, get all the reads of D-registers done by this instruction.
59182dd6ac3SSilviu Baranga SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
59282dd6ac3SSilviu Baranga bool Modified = false;
59382dd6ac3SSilviu Baranga
594c5cf7d91SKazu Hirata for (unsigned I : Defs) {
59582dd6ac3SSilviu Baranga // Follow the def-use chain for this DPR through COPYs, and also through
59682dd6ac3SSilviu Baranga // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
59782dd6ac3SSilviu Baranga // we can end up with multiple defs of this DPR.
59882dd6ac3SSilviu Baranga
59982dd6ac3SSilviu Baranga SmallVector<MachineInstr *, 8> DefSrcs;
600c5cf7d91SKazu Hirata if (!Register::isVirtualRegister(I))
60182dd6ac3SSilviu Baranga continue;
602c5cf7d91SKazu Hirata MachineInstr *Def = MRI->getVRegDef(I);
60382dd6ac3SSilviu Baranga if (!Def)
60482dd6ac3SSilviu Baranga continue;
60582dd6ac3SSilviu Baranga
60682dd6ac3SSilviu Baranga elideCopiesAndPHIs(Def, DefSrcs);
60782dd6ac3SSilviu Baranga
60837b22865SJaved Absar for (MachineInstr *MI : DefSrcs) {
60982dd6ac3SSilviu Baranga // If we've already analyzed and replaced this operand, don't do
61082dd6ac3SSilviu Baranga // anything.
61182dd6ac3SSilviu Baranga if (Replacements.find(MI) != Replacements.end())
61282dd6ac3SSilviu Baranga continue;
61382dd6ac3SSilviu Baranga
61482dd6ac3SSilviu Baranga // Now, work out if the instruction causes a SPR->DPR dependency.
61582dd6ac3SSilviu Baranga if (!hasPartialWrite(MI))
61682dd6ac3SSilviu Baranga continue;
61782dd6ac3SSilviu Baranga
61882dd6ac3SSilviu Baranga // Collect all the uses of this MI's DPR def for updating later.
61982dd6ac3SSilviu Baranga SmallVector<MachineOperand*, 8> Uses;
6200c476111SDaniel Sanders Register DPRDefReg = MI->getOperand(0).getReg();
6212ca45adfSKazu Hirata for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
6222ca45adfSKazu Hirata Uses.push_back(&MO);
62382dd6ac3SSilviu Baranga
62482dd6ac3SSilviu Baranga // We can optimize this.
62582dd6ac3SSilviu Baranga unsigned NewReg = optimizeSDPattern(MI);
62682dd6ac3SSilviu Baranga
62782dd6ac3SSilviu Baranga if (NewReg != 0) {
62882dd6ac3SSilviu Baranga Modified = true;
629c5cf7d91SKazu Hirata for (MachineOperand *Use : Uses) {
63013654dd3SJim Grosbach // Make sure to constrain the register class of the new register to
63113654dd3SJim Grosbach // match what we're replacing. Otherwise we can optimize a DPR_VFP2
63213654dd3SJim Grosbach // reference into a plain DPR, and that will end poorly. NewReg is
63313654dd3SJim Grosbach // always virtual here, so there will always be a matching subclass
63413654dd3SJim Grosbach // to find.
635c5cf7d91SKazu Hirata MRI->constrainRegClass(NewReg, MRI->getRegClass(Use->getReg()));
63613654dd3SJim Grosbach
637c5cf7d91SKazu Hirata LLVM_DEBUG(dbgs() << "Replacing operand " << *Use << " with "
6389d419d3bSFrancis Visoiu Mistrih << printReg(NewReg) << "\n");
639c5cf7d91SKazu Hirata Use->substVirtReg(NewReg, 0, *TRI);
64082dd6ac3SSilviu Baranga }
64182dd6ac3SSilviu Baranga }
64282dd6ac3SSilviu Baranga Replacements[MI] = NewReg;
64382dd6ac3SSilviu Baranga }
64482dd6ac3SSilviu Baranga }
64582dd6ac3SSilviu Baranga return Modified;
64682dd6ac3SSilviu Baranga }
64782dd6ac3SSilviu Baranga
runOnMachineFunction(MachineFunction & Fn)64882dd6ac3SSilviu Baranga bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
649f1caa283SMatthias Braun if (skipFunction(Fn.getFunction()))
650a2b9111eSAndrew Kaylor return false;
651a2b9111eSAndrew Kaylor
65263b44882SEric Christopher const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
65363b44882SEric Christopher // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
65463b44882SEric Christopher // enabled when NEON is available.
655fffa9b58SEvandro Menezes if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
65663b44882SEric Christopher return false;
657fffa9b58SEvandro Menezes
65863b44882SEric Christopher TII = STI.getInstrInfo();
65963b44882SEric Christopher TRI = STI.getRegisterInfo();
66082dd6ac3SSilviu Baranga MRI = &Fn.getRegInfo();
66182dd6ac3SSilviu Baranga bool Modified = false;
66282dd6ac3SSilviu Baranga
663d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");
66482dd6ac3SSilviu Baranga
66582dd6ac3SSilviu Baranga DeadInstr.clear();
66682dd6ac3SSilviu Baranga Replacements.clear();
66782dd6ac3SSilviu Baranga
66837b22865SJaved Absar for (MachineBasicBlock &MBB : Fn) {
66937b22865SJaved Absar for (MachineInstr &MI : MBB) {
67037b22865SJaved Absar Modified |= runOnInstruction(&MI);
67137b22865SJaved Absar }
67282dd6ac3SSilviu Baranga }
67382dd6ac3SSilviu Baranga
67437b22865SJaved Absar for (MachineInstr *MI : DeadInstr) {
67537b22865SJaved Absar MI->eraseFromParent();
67682dd6ac3SSilviu Baranga }
67782dd6ac3SSilviu Baranga
67882dd6ac3SSilviu Baranga return Modified;
67982dd6ac3SSilviu Baranga }
68082dd6ac3SSilviu Baranga
createA15SDOptimizerPass()68182dd6ac3SSilviu Baranga FunctionPass *llvm::createA15SDOptimizerPass() {
68282dd6ac3SSilviu Baranga return new A15SDOptimizer();
68382dd6ac3SSilviu Baranga }
684