1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPC.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "ppc-pre-emit-peephole"
31 
32 STATISTIC(NumRRConvertedInPreEmit,
33           "Number of r+r instructions converted to r+i in pre-emit peephole");
34 STATISTIC(NumRemovedInPreEmit,
35           "Number of instructions deleted in pre-emit peephole");
36 STATISTIC(NumberOfSelfCopies,
37           "Number of self copy instructions eliminated");
38 STATISTIC(NumFrameOffFoldInPreEmit,
39           "Number of folding frame offset by using r+r in pre-emit peephole");
40 
41 static cl::opt<bool>
42 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
43                    cl::desc("Run pre-emit peephole optimizations."));
44 
45 namespace {
46 
47 static bool hasPCRelativeForm(MachineInstr &Use) {
48   switch (Use.getOpcode()) {
49   default:
50     return false;
51   case PPC::LBZ:
52   case PPC::LBZ8:
53   case PPC::LHA:
54   case PPC::LHA8:
55   case PPC::LHZ:
56   case PPC::LHZ8:
57   case PPC::LWZ:
58   case PPC::LWZ8:
59   case PPC::STB:
60   case PPC::STB8:
61   case PPC::STH:
62   case PPC::STH8:
63   case PPC::STW:
64   case PPC::STW8:
65   case PPC::LD:
66   case PPC::STD:
67   case PPC::LWA:
68   case PPC::LXSD:
69   case PPC::LXSSP:
70   case PPC::LXV:
71   case PPC::STXSD:
72   case PPC::STXSSP:
73   case PPC::STXV:
74   case PPC::LFD:
75   case PPC::LFS:
76   case PPC::STFD:
77   case PPC::STFS:
78   case PPC::DFLOADf32:
79   case PPC::DFLOADf64:
80   case PPC::DFSTOREf32:
81   case PPC::DFSTOREf64:
82     return true;
83   }
84 }
85 
86   class PPCPreEmitPeephole : public MachineFunctionPass {
87   public:
88     static char ID;
89     PPCPreEmitPeephole() : MachineFunctionPass(ID) {
90       initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
91     }
92 
93     void getAnalysisUsage(AnalysisUsage &AU) const override {
94       MachineFunctionPass::getAnalysisUsage(AU);
95     }
96 
97     MachineFunctionProperties getRequiredProperties() const override {
98       return MachineFunctionProperties().set(
99           MachineFunctionProperties::Property::NoVRegs);
100     }
101 
102     // This function removes any redundant load immediates. It has two level
103     // loops - The outer loop finds the load immediates BBI that could be used
104     // to replace following redundancy. The inner loop scans instructions that
105     // after BBI to find redundancy and update kill/dead flags accordingly. If
106     // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
107     // that modify the def register of BBI would break the scanning.
108     // DeadOrKillToUnset is a pointer to the previous operand that had the
109     // kill/dead flag set. It keeps track of the def register of BBI, the use
110     // registers of AfterBBIs and the def registers of AfterBBIs.
111     bool removeRedundantLIs(MachineBasicBlock &MBB,
112                             const TargetRegisterInfo *TRI) {
113       LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
114                  MBB.dump(); dbgs() << "\n");
115 
116       DenseSet<MachineInstr *> InstrsToErase;
117       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
118         // Skip load immediate that is marked to be erased later because it
119         // cannot be used to replace any other instructions.
120         if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
121           continue;
122         // Skip non-load immediate.
123         unsigned Opc = BBI->getOpcode();
124         if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
125             Opc != PPC::LIS8)
126           continue;
127         // Skip load immediate, where the operand is a relocation (e.g., $r3 =
128         // LI target-flags(ppc-lo) %const.0).
129         if (!BBI->getOperand(1).isImm())
130           continue;
131         assert(BBI->getOperand(0).isReg() &&
132                "Expected a register for the first operand");
133 
134         LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
135 
136         Register Reg = BBI->getOperand(0).getReg();
137         int64_t Imm = BBI->getOperand(1).getImm();
138         MachineOperand *DeadOrKillToUnset = nullptr;
139         if (BBI->getOperand(0).isDead()) {
140           DeadOrKillToUnset = &BBI->getOperand(0);
141           LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
142                             << " from load immediate " << *BBI
143                             << " is a unsetting candidate\n");
144         }
145         // This loop scans instructions after BBI to see if there is any
146         // redundant load immediate.
147         for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
148              ++AfterBBI) {
149           // Track the operand that kill Reg. We would unset the kill flag of
150           // the operand if there is a following redundant load immediate.
151           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
152 
153           // We can't just clear implicit kills, so if we encounter one, stop
154           // looking further.
155           if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
156             LLVM_DEBUG(dbgs()
157                        << "Encountered an implicit kill, cannot proceed: ");
158             LLVM_DEBUG(AfterBBI->dump());
159             break;
160           }
161 
162           if (KillIdx != -1) {
163             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
164             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
165             LLVM_DEBUG(dbgs()
166                        << " Kill flag of " << *DeadOrKillToUnset << " from "
167                        << *AfterBBI << " is a unsetting candidate\n");
168           }
169 
170           if (!AfterBBI->modifiesRegister(Reg, TRI))
171             continue;
172           // Finish scanning because Reg is overwritten by a non-load
173           // instruction.
174           if (AfterBBI->getOpcode() != Opc)
175             break;
176           assert(AfterBBI->getOperand(0).isReg() &&
177                  "Expected a register for the first operand");
178           // Finish scanning because Reg is overwritten by a relocation or a
179           // different value.
180           if (!AfterBBI->getOperand(1).isImm() ||
181               AfterBBI->getOperand(1).getImm() != Imm)
182             break;
183 
184           // It loads same immediate value to the same Reg, which is redundant.
185           // We would unset kill flag in previous Reg usage to extend live range
186           // of Reg first, then remove the redundancy.
187           if (DeadOrKillToUnset) {
188             LLVM_DEBUG(dbgs()
189                        << " Unset dead/kill flag of " << *DeadOrKillToUnset
190                        << " from " << *DeadOrKillToUnset->getParent());
191             if (DeadOrKillToUnset->isDef())
192               DeadOrKillToUnset->setIsDead(false);
193             else
194               DeadOrKillToUnset->setIsKill(false);
195           }
196           DeadOrKillToUnset =
197               AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
198           if (DeadOrKillToUnset)
199             LLVM_DEBUG(dbgs()
200                        << " Dead flag of " << *DeadOrKillToUnset << " from "
201                        << *AfterBBI << " is a unsetting candidate\n");
202           InstrsToErase.insert(&*AfterBBI);
203           LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
204                      AfterBBI->dump());
205         }
206       }
207 
208       for (MachineInstr *MI : InstrsToErase) {
209         MI->eraseFromParent();
210       }
211       NumRemovedInPreEmit += InstrsToErase.size();
212       return !InstrsToErase.empty();
213     }
214 
215     // Check if this instruction is a PLDpc that is part of a GOT indirect
216     // access.
217     bool isGOTPLDpc(MachineInstr &Instr) {
218       if (Instr.getOpcode() != PPC::PLDpc)
219         return false;
220 
221       // The result must be a register.
222       const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
223       if (!LoadedAddressReg.isReg())
224         return false;
225 
226       // Make sure that this is a global symbol.
227       const MachineOperand &SymbolOp = Instr.getOperand(1);
228       if (!SymbolOp.isGlobal())
229         return false;
230 
231       // Finally return true only if the GOT flag is present.
232       return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
233     }
234 
235     bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
236       MachineFunction *MF = MBB.getParent();
237       // Add this linker opt only if we are using PC Relative memops.
238       if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
239         return false;
240 
241       // Struct to keep track of one def/use pair for a GOT indirect access.
242       struct GOTDefUsePair {
243         MachineBasicBlock::iterator DefInst;
244         MachineBasicBlock::iterator UseInst;
245         Register DefReg;
246         Register UseReg;
247         bool StillValid;
248       };
249       // Vector of def/ues pairs in this basic block.
250       SmallVector<GOTDefUsePair, 4> CandPairs;
251       SmallVector<GOTDefUsePair, 4> ValidPairs;
252       bool MadeChange = false;
253 
254       // Run through all of the instructions in the basic block and try to
255       // collect potential pairs of GOT indirect access instructions.
256       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
257         // Look for the initial GOT indirect load.
258         if (isGOTPLDpc(*BBI)) {
259           GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
260                                     BBI->getOperand(0).getReg(),
261                                     PPC::NoRegister, true};
262           CandPairs.push_back(CurrentPair);
263           continue;
264         }
265 
266         // We haven't encountered any new PLD instructions, nothing to check.
267         if (CandPairs.empty())
268           continue;
269 
270         // Run through the candidate pairs and see if any of the registers
271         // defined in the PLD instructions are used by this instruction.
272         // Note: the size of CandPairs can change in the loop.
273         for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
274           GOTDefUsePair &Pair = CandPairs[Idx];
275           // The instruction does not use or modify this PLD's def reg,
276           // ignore it.
277           if (!BBI->readsRegister(Pair.DefReg, TRI) &&
278               !BBI->modifiesRegister(Pair.DefReg, TRI))
279             continue;
280 
281           // The use needs to be used in the address compuation and not
282           // as the register being stored for a store.
283           const MachineOperand *UseOp =
284               hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
285 
286           // Check for a valid use.
287           if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
288               UseOp->isUse() && UseOp->isKill()) {
289             Pair.UseInst = BBI;
290             Pair.UseReg = BBI->getOperand(0).getReg();
291             ValidPairs.push_back(Pair);
292           }
293           CandPairs.erase(CandPairs.begin() + Idx);
294         }
295       }
296 
297       // Go through all of the pairs and check for any more valid uses.
298       for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
299         // We shouldn't be here if we don't have a valid pair.
300         assert(Pair->UseInst.isValid() && Pair->StillValid &&
301                "Kept an invalid def/use pair for GOT PCRel opt");
302         // We have found a potential pair. Search through the instructions
303         // between the def and the use to see if it is valid to mark this as a
304         // linker opt.
305         MachineBasicBlock::iterator BBI = Pair->DefInst;
306         ++BBI;
307         for (; BBI != Pair->UseInst; ++BBI) {
308           if (BBI->readsRegister(Pair->UseReg, TRI) ||
309               BBI->modifiesRegister(Pair->UseReg, TRI)) {
310             Pair->StillValid = false;
311             break;
312           }
313         }
314 
315         if (!Pair->StillValid)
316           continue;
317 
318         // The load/store instruction that uses the address from the PLD will
319         // either use a register (for a store) or define a register (for the
320         // load). That register will be added as an implicit def to the PLD
321         // and as an implicit use on the second memory op. This is a precaution
322         // to prevent future passes from using that register between the two
323         // instructions.
324         MachineOperand ImplDef =
325             MachineOperand::CreateReg(Pair->UseReg, true, true);
326         MachineOperand ImplUse =
327             MachineOperand::CreateReg(Pair->UseReg, false, true);
328         Pair->DefInst->addOperand(ImplDef);
329         Pair->UseInst->addOperand(ImplUse);
330 
331         // Create the symbol.
332         MCContext &Context = MF->getContext();
333         MCSymbol *Symbol =
334             Context.createTempSymbol(Twine("pcrel"), false, false);
335         MachineOperand PCRelLabel =
336             MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
337         Pair->DefInst->addOperand(*MF, PCRelLabel);
338         Pair->UseInst->addOperand(*MF, PCRelLabel);
339         MadeChange |= true;
340       }
341       return MadeChange;
342     }
343 
344     bool runOnMachineFunction(MachineFunction &MF) override {
345       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
346         // Remove UNENCODED_NOP even when this pass is disabled.
347         // This needs to be done unconditionally so we don't emit zeros
348         // in the instruction stream.
349         SmallVector<MachineInstr *, 4> InstrsToErase;
350         for (MachineBasicBlock &MBB : MF)
351           for (MachineInstr &MI : MBB)
352             if (MI.getOpcode() == PPC::UNENCODED_NOP)
353               InstrsToErase.push_back(&MI);
354         for (MachineInstr *MI : InstrsToErase)
355           MI->eraseFromParent();
356         return false;
357       }
358       bool Changed = false;
359       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
360       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
361       SmallVector<MachineInstr *, 4> InstrsToErase;
362       for (MachineBasicBlock &MBB : MF) {
363         Changed |= removeRedundantLIs(MBB, TRI);
364         Changed |= addLinkerOpt(MBB, TRI);
365         for (MachineInstr &MI : MBB) {
366           unsigned Opc = MI.getOpcode();
367           if (Opc == PPC::UNENCODED_NOP) {
368             InstrsToErase.push_back(&MI);
369             continue;
370           }
371           // Detect self copies - these can result from running AADB.
372           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
373             const MCInstrDesc &MCID = TII->get(Opc);
374             if (MCID.getNumOperands() == 3 &&
375                 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
376                 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
377               NumberOfSelfCopies++;
378               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
379               LLVM_DEBUG(MI.dump());
380               InstrsToErase.push_back(&MI);
381               continue;
382             }
383             else if (MCID.getNumOperands() == 2 &&
384                      MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
385               NumberOfSelfCopies++;
386               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
387               LLVM_DEBUG(MI.dump());
388               InstrsToErase.push_back(&MI);
389               continue;
390             }
391           }
392           MachineInstr *DefMIToErase = nullptr;
393           if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
394             Changed = true;
395             NumRRConvertedInPreEmit++;
396             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
397             LLVM_DEBUG(MI.dump());
398             if (DefMIToErase) {
399               InstrsToErase.push_back(DefMIToErase);
400             }
401           }
402           if (TII->foldFrameOffset(MI)) {
403             Changed = true;
404             NumFrameOffFoldInPreEmit++;
405             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
406             LLVM_DEBUG(MI.dump());
407           }
408         }
409 
410         // Eliminate conditional branch based on a constant CR bit by
411         // CRSET or CRUNSET. We eliminate the conditional branch or
412         // convert it into an unconditional branch. Also, if the CR bit
413         // is not used by other instructions, we eliminate CRSET as well.
414         auto I = MBB.getFirstInstrTerminator();
415         if (I == MBB.instr_end())
416           continue;
417         MachineInstr *Br = &*I;
418         if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
419           continue;
420         MachineInstr *CRSetMI = nullptr;
421         Register CRBit = Br->getOperand(0).getReg();
422         unsigned CRReg = getCRFromCRBit(CRBit);
423         bool SeenUse = false;
424         MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
425         for (It++; It != Er; It++) {
426           if (It->modifiesRegister(CRBit, TRI)) {
427             if ((It->getOpcode() == PPC::CRUNSET ||
428                  It->getOpcode() == PPC::CRSET) &&
429                 It->getOperand(0).getReg() == CRBit)
430               CRSetMI = &*It;
431             break;
432           }
433           if (It->readsRegister(CRBit, TRI))
434             SeenUse = true;
435         }
436         if (!CRSetMI) continue;
437 
438         unsigned CRSetOp = CRSetMI->getOpcode();
439         if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
440             (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
441           // Remove this branch since it cannot be taken.
442           InstrsToErase.push_back(Br);
443           MBB.removeSuccessor(Br->getOperand(1).getMBB());
444         }
445         else {
446           // This conditional branch is always taken. So, remove all branches
447           // and insert an unconditional branch to the destination of this.
448           MachineBasicBlock::iterator It = Br, Er = MBB.end();
449           for (; It != Er; It++) {
450             if (It->isDebugInstr()) continue;
451             assert(It->isTerminator() && "Non-terminator after a terminator");
452             InstrsToErase.push_back(&*It);
453           }
454           if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
455             ArrayRef<MachineOperand> NoCond;
456             TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
457                               NoCond, Br->getDebugLoc());
458           }
459           for (auto &Succ : MBB.successors())
460             if (Succ != Br->getOperand(1).getMBB()) {
461               MBB.removeSuccessor(Succ);
462               break;
463             }
464         }
465 
466         // If the CRBit is not used by another instruction, we can eliminate
467         // CRSET/CRUNSET instruction.
468         if (!SeenUse) {
469           // We need to check use of the CRBit in successors.
470           for (auto &SuccMBB : MBB.successors())
471             if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
472               SeenUse = true;
473               break;
474             }
475           if (!SeenUse)
476             InstrsToErase.push_back(CRSetMI);
477         }
478       }
479       for (MachineInstr *MI : InstrsToErase) {
480         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
481         LLVM_DEBUG(MI->dump());
482         MI->eraseFromParent();
483         NumRemovedInPreEmit++;
484       }
485       return Changed;
486     }
487   };
488 }
489 
490 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
491                 false, false)
492 char PPCPreEmitPeephole::ID = 0;
493 
494 FunctionPass *llvm::createPPCPreEmitPeepholePass() {
495   return new PPCPreEmitPeephole();
496 }
497