13ca95b02SDimitry Andric //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
23ca95b02SDimitry Andric //
33ca95b02SDimitry Andric //                     The LLVM Compiler Infrastructure
43ca95b02SDimitry Andric //
53ca95b02SDimitry Andric // This file is distributed under the University of Illinois Open Source
63ca95b02SDimitry Andric // License. See LICENSE.TXT for details.
73ca95b02SDimitry Andric //
83ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
93ca95b02SDimitry Andric //
103ca95b02SDimitry Andric // The QPX vector registers overlay the scalar floating-point registers, and
113ca95b02SDimitry Andric // any scalar floating-point loads splat their value across all vector lanes.
123ca95b02SDimitry Andric // Thus, if we have a scalar load followed by a splat, we can remove the splat
133ca95b02SDimitry Andric // (i.e. replace the load with a load-and-splat pseudo instruction).
143ca95b02SDimitry Andric //
153ca95b02SDimitry Andric // This pass must run after anything that might do store-to-load forwarding.
163ca95b02SDimitry Andric //
173ca95b02SDimitry Andric //===----------------------------------------------------------------------===//
183ca95b02SDimitry Andric 
193ca95b02SDimitry Andric #include "PPC.h"
203ca95b02SDimitry Andric #include "PPCInstrBuilder.h"
213ca95b02SDimitry Andric #include "PPCInstrInfo.h"
223ca95b02SDimitry Andric #include "llvm/ADT/SmallVector.h"
233ca95b02SDimitry Andric #include "llvm/ADT/Statistic.h"
243ca95b02SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
25*2cab237bSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
263ca95b02SDimitry Andric #include "llvm/Support/MathExtras.h"
273ca95b02SDimitry Andric #include "llvm/Target/TargetMachine.h"
283ca95b02SDimitry Andric using namespace llvm;
293ca95b02SDimitry Andric 
303ca95b02SDimitry Andric #define DEBUG_TYPE "ppc-qpx-load-splat"
313ca95b02SDimitry Andric 
323ca95b02SDimitry Andric STATISTIC(NumSimplified, "Number of QPX load splats simplified");
333ca95b02SDimitry Andric 
343ca95b02SDimitry Andric namespace llvm {
353ca95b02SDimitry Andric   void initializePPCQPXLoadSplatPass(PassRegistry&);
363ca95b02SDimitry Andric }
373ca95b02SDimitry Andric 
383ca95b02SDimitry Andric namespace {
393ca95b02SDimitry Andric   struct PPCQPXLoadSplat : public MachineFunctionPass {
403ca95b02SDimitry Andric     static char ID;
PPCQPXLoadSplat__anon43ae9fda0111::PPCQPXLoadSplat413ca95b02SDimitry Andric     PPCQPXLoadSplat() : MachineFunctionPass(ID) {
423ca95b02SDimitry Andric       initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
433ca95b02SDimitry Andric     }
443ca95b02SDimitry Andric 
453ca95b02SDimitry Andric     bool runOnMachineFunction(MachineFunction &Fn) override;
463ca95b02SDimitry Andric 
getPassName__anon43ae9fda0111::PPCQPXLoadSplat47d88c1a5aSDimitry Andric     StringRef getPassName() const override {
483ca95b02SDimitry Andric       return "PowerPC QPX Load Splat Simplification";
493ca95b02SDimitry Andric     }
503ca95b02SDimitry Andric   };
513ca95b02SDimitry Andric   char PPCQPXLoadSplat::ID = 0;
523ca95b02SDimitry Andric }
533ca95b02SDimitry Andric 
543ca95b02SDimitry Andric INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
553ca95b02SDimitry Andric                 "PowerPC QPX Load Splat Simplification",
563ca95b02SDimitry Andric                 false, false)
573ca95b02SDimitry Andric 
createPPCQPXLoadSplatPass()583ca95b02SDimitry Andric FunctionPass *llvm::createPPCQPXLoadSplatPass() {
593ca95b02SDimitry Andric   return new PPCQPXLoadSplat();
603ca95b02SDimitry Andric }
613ca95b02SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)623ca95b02SDimitry Andric bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
63*2cab237bSDimitry Andric   if (skipFunction(MF.getFunction()))
643ca95b02SDimitry Andric     return false;
653ca95b02SDimitry Andric 
663ca95b02SDimitry Andric   bool MadeChange = false;
673ca95b02SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
683ca95b02SDimitry Andric 
693ca95b02SDimitry Andric   for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
703ca95b02SDimitry Andric     MachineBasicBlock *MBB = &*MFI;
713ca95b02SDimitry Andric     SmallVector<MachineInstr *, 4> Splats;
723ca95b02SDimitry Andric 
733ca95b02SDimitry Andric     for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
743ca95b02SDimitry Andric       MachineInstr *MI = &*MBBI;
753ca95b02SDimitry Andric 
763ca95b02SDimitry Andric       if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
773ca95b02SDimitry Andric         Splats.clear();
783ca95b02SDimitry Andric         continue;
793ca95b02SDimitry Andric       }
803ca95b02SDimitry Andric 
813ca95b02SDimitry Andric       // We're looking for a sequence like this:
82*2cab237bSDimitry Andric       // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
83*2cab237bSDimitry Andric       // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
843ca95b02SDimitry Andric 
853ca95b02SDimitry Andric       for (auto SI = Splats.begin(); SI != Splats.end();) {
863ca95b02SDimitry Andric         MachineInstr *SMI = *SI;
873ca95b02SDimitry Andric         unsigned SplatReg = SMI->getOperand(0).getReg();
883ca95b02SDimitry Andric         unsigned SrcReg = SMI->getOperand(1).getReg();
893ca95b02SDimitry Andric 
903ca95b02SDimitry Andric         if (MI->modifiesRegister(SrcReg, TRI)) {
913ca95b02SDimitry Andric           switch (MI->getOpcode()) {
923ca95b02SDimitry Andric           default:
933ca95b02SDimitry Andric             SI = Splats.erase(SI);
943ca95b02SDimitry Andric             continue;
953ca95b02SDimitry Andric           case PPC::LFS:
963ca95b02SDimitry Andric           case PPC::LFD:
973ca95b02SDimitry Andric           case PPC::LFSU:
983ca95b02SDimitry Andric           case PPC::LFDU:
993ca95b02SDimitry Andric           case PPC::LFSUX:
1003ca95b02SDimitry Andric           case PPC::LFDUX:
1013ca95b02SDimitry Andric           case PPC::LFSX:
1023ca95b02SDimitry Andric           case PPC::LFDX:
1033ca95b02SDimitry Andric           case PPC::LFIWAX:
1043ca95b02SDimitry Andric           case PPC::LFIWZX:
1053ca95b02SDimitry Andric             if (SplatReg != SrcReg) {
1063ca95b02SDimitry Andric               // We need to change the load to define the scalar subregister of
1073ca95b02SDimitry Andric               // the QPX splat source register.
1083ca95b02SDimitry Andric               unsigned SubRegIndex =
1093ca95b02SDimitry Andric                 TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
1103ca95b02SDimitry Andric               unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
1113ca95b02SDimitry Andric 
1123ca95b02SDimitry Andric               // Substitute both the explicit defined register, and also the
1133ca95b02SDimitry Andric               // implicit def of the containing QPX register.
1143ca95b02SDimitry Andric               MI->getOperand(0).setReg(SplatSubReg);
1153ca95b02SDimitry Andric               MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
1163ca95b02SDimitry Andric             }
1173ca95b02SDimitry Andric 
1183ca95b02SDimitry Andric             SI = Splats.erase(SI);
1193ca95b02SDimitry Andric 
1203ca95b02SDimitry Andric             // If SMI is directly after MI, then MBBI's base iterator is
1213ca95b02SDimitry Andric             // pointing at SMI.  Adjust MBBI around the call to erase SMI to
1223ca95b02SDimitry Andric             // avoid invalidating MBBI.
1233ca95b02SDimitry Andric             ++MBBI;
1243ca95b02SDimitry Andric             SMI->eraseFromParent();
1253ca95b02SDimitry Andric             --MBBI;
1263ca95b02SDimitry Andric 
1273ca95b02SDimitry Andric             ++NumSimplified;
1283ca95b02SDimitry Andric             MadeChange = true;
1293ca95b02SDimitry Andric             continue;
1303ca95b02SDimitry Andric           }
1313ca95b02SDimitry Andric         }
1323ca95b02SDimitry Andric 
1333ca95b02SDimitry Andric         // If this instruction defines the splat register, then we cannot move
1343ca95b02SDimitry Andric         // the previous definition above it. If it reads from the splat
1353ca95b02SDimitry Andric         // register, then it must already be alive from some previous
1363ca95b02SDimitry Andric         // definition, and if the splat register is different from the source
1373ca95b02SDimitry Andric         // register, then this definition must not be the load for which we're
1383ca95b02SDimitry Andric         // searching.
1393ca95b02SDimitry Andric         if (MI->modifiesRegister(SplatReg, TRI) ||
1403ca95b02SDimitry Andric             (SrcReg != SplatReg &&
1413ca95b02SDimitry Andric              MI->readsRegister(SplatReg, TRI))) {
1423ca95b02SDimitry Andric           SI = Splats.erase(SI);
1433ca95b02SDimitry Andric           continue;
1443ca95b02SDimitry Andric         }
1453ca95b02SDimitry Andric 
1463ca95b02SDimitry Andric         ++SI;
1473ca95b02SDimitry Andric       }
1483ca95b02SDimitry Andric 
1493ca95b02SDimitry Andric       if (MI->getOpcode() != PPC::QVESPLATI &&
1503ca95b02SDimitry Andric           MI->getOpcode() != PPC::QVESPLATIs &&
1513ca95b02SDimitry Andric           MI->getOpcode() != PPC::QVESPLATIb)
1523ca95b02SDimitry Andric         continue;
1533ca95b02SDimitry Andric       if (MI->getOperand(2).getImm() != 0)
1543ca95b02SDimitry Andric         continue;
1553ca95b02SDimitry Andric 
1563ca95b02SDimitry Andric       // If there are other uses of the scalar value after this, replacing
1573ca95b02SDimitry Andric       // those uses might be non-trivial.
1583ca95b02SDimitry Andric       if (!MI->getOperand(1).isKill())
1593ca95b02SDimitry Andric         continue;
1603ca95b02SDimitry Andric 
1613ca95b02SDimitry Andric       Splats.push_back(MI);
1623ca95b02SDimitry Andric     }
1633ca95b02SDimitry Andric   }
1643ca95b02SDimitry Andric 
1653ca95b02SDimitry Andric   return MadeChange;
1663ca95b02SDimitry Andric }
167