1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRPseudo/DecreaseCTRPseudo
11 // 2: MTCTR8Pseudo/DecreaseCTR8Pseudo
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRPseudo/MTCTR8Pseudo will be converted to "mtctr"
15 // 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "bdnz/bdz" and
16 // its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRPseudo/MTCTR8Pseudo can be deleted.
20 // 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "addi -1" and
21 // a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRPseudo if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include <cassert>
49
50 using namespace llvm;
51
52 #define DEBUG_TYPE "ppc-ctrloops"
53
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60 static char ID;
61
PPCCTRLoops()62 PPCCTRLoops() : MachineFunctionPass(ID) {
63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64 }
65
getAnalysisUsage(AnalysisUsage & AU) const66 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 AU.addRequired<MachineLoopInfo>();
68 MachineFunctionPass::getAnalysisUsage(AU);
69 }
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72
73 private:
74 const PPCInstrInfo *TII = nullptr;
75 MachineRegisterInfo *MRI = nullptr;
76
77 bool processLoop(MachineLoop *ML);
78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80 MachineInstr *Dec);
81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84
85 char PPCCTRLoops::ID = 0;
86
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88 false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91 false, false)
92
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94
runOnMachineFunction(MachineFunction & MF)95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96 bool Changed = false;
97
98 auto &MLI = getAnalysis<MachineLoopInfo>();
99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100 MRI = &MF.getRegInfo();
101
102 for (auto ML : MLI) {
103 if (ML->isOutermost())
104 Changed |= processLoop(ML);
105 }
106
107 return Changed;
108 }
109
isCTRClobber(MachineInstr * MI,bool CheckReads) const110 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
111 if (!CheckReads) {
112 // If we are only checking for defs, that is we are going to find
113 // definitions before MTCTRloop, for this case:
114 // CTR defination inside the callee of a call instruction will not impact
115 // the defination of MTCTRloop, so we can use definesRegister() for the
116 // check, no need to check the regmask.
117 return (MI->definesRegister(PPC::CTR) &&
118 !MI->registerDefIsDead(PPC::CTR)) ||
119 (MI->definesRegister(PPC::CTR8) &&
120 !MI->registerDefIsDead(PPC::CTR8));
121 }
122
123 if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) ||
124 (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8)))
125 return true;
126
127 if (MI->getDesc().isCall())
128 return true;
129
130 // We define the CTR in the loop preheader, so if there is any CTR reader in
131 // the loop, we also can not use CTR loop form.
132 if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
133 return true;
134
135 return false;
136 }
137
processLoop(MachineLoop * ML)138 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
139 bool Changed = false;
140
141 // Align with HardwareLoop pass, process inner loops first.
142 for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
143 Changed |= processLoop(*I);
144
145 // If any inner loop is changed, outter loop must be without hardware loop
146 // intrinsics.
147 if (Changed)
148 return true;
149
150 auto IsLoopStart = [](MachineInstr &MI) {
151 return MI.getOpcode() == PPC::MTCTRPseudo ||
152 MI.getOpcode() == PPC::MTCTR8Pseudo;
153 };
154
155 auto SearchForStart =
156 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
157 for (auto &MI : *MBB) {
158 if (IsLoopStart(MI))
159 return &MI;
160 }
161 return nullptr;
162 };
163
164 MachineInstr *Start = nullptr;
165 MachineInstr *Dec = nullptr;
166 bool InvalidCTRLoop = false;
167
168 MachineBasicBlock *Preheader = ML->getLoopPreheader();
169 // If there is no preheader for this loop, there must be no MTCTRPseudo
170 // either.
171 if (!Preheader)
172 return false;
173
174 Start = SearchForStart(Preheader);
175 // This is not a CTR loop candidate.
176 if (!Start)
177 return false;
178
179 // If CTR is live to the preheader, we can not redefine the CTR register.
180 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
181 InvalidCTRLoop = true;
182
183 // Make sure there is also no CTR clobber in the block preheader between the
184 // begin and MTCTR.
185 for (MachineBasicBlock::reverse_instr_iterator I =
186 std::next(Start->getReverseIterator());
187 I != Preheader->instr_rend(); ++I)
188 // Only check the definitions of CTR. If there is non-dead definition for
189 // the CTR, we conservatively don't generate a CTR loop.
190 if (isCTRClobber(&*I, /* CheckReads */ false)) {
191 InvalidCTRLoop = true;
192 break;
193 }
194
195 // Make sure there is also no CTR clobber/user in the block preheader between
196 // MTCTR and the end.
197 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
198 I != Preheader->instr_end(); ++I)
199 if (isCTRClobber(&*I, /* CheckReads */ true)) {
200 InvalidCTRLoop = true;
201 break;
202 }
203
204 // Find the CTR loop components and decide whether or not to fall back to a
205 // normal loop.
206 for (auto *MBB : reverse(ML->getBlocks())) {
207 for (auto &MI : *MBB) {
208 if (MI.getOpcode() == PPC::DecreaseCTRPseudo ||
209 MI.getOpcode() == PPC::DecreaseCTR8Pseudo)
210 Dec = &MI;
211 else if (!InvalidCTRLoop)
212 // If any instruction clobber CTR, then we can not generate a CTR loop.
213 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
214 }
215 if (Dec && InvalidCTRLoop)
216 break;
217 }
218
219 assert(Dec && "CTR loop is not complete!");
220
221 if (InvalidCTRLoop) {
222 expandNormalLoops(ML, Start, Dec);
223 ++NumNormalLoops;
224 }
225 else {
226 expandCTRLoops(ML, Start, Dec);
227 ++NumCTRLoops;
228 }
229 return true;
230 }
231
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)232 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
233 MachineInstr *Dec) {
234 bool Is64Bit =
235 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
236
237 MachineBasicBlock *Preheader = Start->getParent();
238 MachineBasicBlock *Exiting = Dec->getParent();
239 assert((Preheader && Exiting) &&
240 "Preheader and exiting should exist for CTR loop!");
241
242 assert(Dec->getOperand(1).getImm() == 1 &&
243 "Loop decrement stride must be 1");
244
245 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
246 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
247
248 Register PHIDef =
249 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
250 : &PPC::GPRC_and_GPRC_NOR0RegClass);
251
252 Start->getParent()->getParent()->getProperties().reset(
253 MachineFunctionProperties::Property::NoPHIs);
254
255 // Generate "PHI" in the header block.
256 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
257 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
258 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
259
260 Register ADDIDef =
261 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
262 : &PPC::GPRC_and_GPRC_NOR0RegClass);
263 // Generate "addi -1" in the exiting block.
264 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
265 .addReg(PHIDef)
266 .addImm(-1);
267
268 // Add other inputs for the PHI node.
269 if (ML->isLoopLatch(Exiting)) {
270 // There must be only two predecessors for the loop header, one is the
271 // Preheader and the other one is loop latch Exiting. In hardware loop
272 // insertion pass, the block containing DecreaseCTRloop must dominate all
273 // loop latches. So there must be only one latch.
274 assert(ML->getHeader()->pred_size() == 2 &&
275 "Loop header predecessor is not right!");
276 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
277 } else {
278 // If the block containing DecreaseCTRloop is not a loop latch, we can use
279 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
280 // insertion pass, the block containing DecreaseCTRloop must dominate all
281 // loop latches.
282 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
283 if (ML->contains(P)) {
284 assert(ML->isLoopLatch(P) &&
285 "Loop's header in-loop predecessor is not loop latch!");
286 PHIMIB.addReg(ADDIDef).addMBB(P);
287 } else
288 assert(P == Preheader &&
289 "CTR loop should not be generated for irreducible loop!");
290 }
291 }
292
293 // Generate the compare in the exiting block.
294 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
295 auto CMPMIB =
296 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
297 .addReg(ADDIDef)
298 .addImm(0);
299
300 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
301 Dec->getOperand(0).getReg())
302 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
303
304 // Remove the pseudo instructions.
305 Start->eraseFromParent();
306 Dec->eraseFromParent();
307 }
308
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)309 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
310 MachineInstr *Dec) {
311 bool Is64Bit =
312 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
313
314 MachineBasicBlock *Preheader = Start->getParent();
315 MachineBasicBlock *Exiting = Dec->getParent();
316 assert((Preheader && Exiting) &&
317 "Preheader and exiting should exist for CTR loop!");
318
319 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
320
321 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
322 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
323 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
324 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
325 "There should be only one user for loop decrement pseudo!");
326
327 unsigned Opcode = 0;
328 switch (BrInstr->getOpcode()) {
329 case PPC::BC:
330 Opcode = BDNZOpcode;
331 (void) ML;
332 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
333 "Invalid ctr loop!");
334 break;
335 case PPC::BCn:
336 Opcode = BDZOpcode;
337 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
338 "Invalid ctr loop!");
339 break;
340 default:
341 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
342 }
343
344 unsigned MTCTROpcode = Is64Bit ? PPC::MTCTR8 : PPC::MTCTR;
345
346 // Generate "mtctr" in the loop preheader.
347 BuildMI(*Preheader, Start, Start->getDebugLoc(), TII->get(MTCTROpcode))
348 .addReg(Start->getOperand(0).getReg());
349
350 // Generate "bdnz/bdz" in the exiting block just before the terminator.
351 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
352 .addMBB(BrInstr->getOperand(1).getMBB());
353
354 // Remove the pseudo instructions.
355 Start->eraseFromParent();
356 BrInstr->eraseFromParent();
357 Dec->eraseFromParent();
358 }
359