1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/iterator_range.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
38 #include "llvm/Analysis/MemorySSA.h"
39 #include "llvm/Analysis/MemorySSAUpdater.h"
40 #include "llvm/Analysis/ScalarEvolution.h"
41 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
42 #include "llvm/Analysis/TargetLibraryInfo.h"
43 #include "llvm/Analysis/TargetTransformInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/IR/ValueHandle.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/Support/raw_ostream.h"
76 #include "llvm/Transforms/Scalar.h"
77 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
78 #include "llvm/Transforms/Utils/Local.h"
79 #include "llvm/Transforms/Utils/LoopUtils.h"
80 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
81 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85
86 using namespace llvm;
87 using namespace PatternMatch;
88
89 #define DEBUG_TYPE "indvars"
90
91 STATISTIC(NumWidened , "Number of indvars widened");
92 STATISTIC(NumReplaced , "Number of exit values replaced");
93 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
96
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
100 static cl::opt<bool> VerifyIndvars(
101 "verify-indvars", cl::Hidden,
102 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
103 "effect in release builds. (Note: this adds additional SCEV "
104 "queries potentially changing the analysis result)"));
105
106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
107 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109 cl::values(
110 clEnumValN(NeverRepl, "never", "never replace exit value"),
111 clEnumValN(OnlyCheapRepl, "cheap",
112 "only replace exit value when the cost is cheap"),
113 clEnumValN(
114 UnusedIndVarInLoop, "unusedindvarinloop",
115 "only replace exit value when it is an unused "
116 "induction variable in the loop and has cheap replacement cost"),
117 clEnumValN(NoHardUse, "noharduse",
118 "only replace exit values when loop def likely dead"),
119 clEnumValN(AlwaysRepl, "always",
120 "always replace exit value whenever possible")));
121
122 static cl::opt<bool> UsePostIncrementRanges(
123 "indvars-post-increment-ranges", cl::Hidden,
124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125 cl::init(true));
126
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129 cl::desc("Disable Linear Function Test Replace optimization"));
130
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133 cl::desc("Predicate conditions in read only loops"));
134
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137 cl::desc("Allow widening of indvars to eliminate s/zext"));
138
139 namespace {
140
141 class IndVarSimplify {
142 LoopInfo *LI;
143 ScalarEvolution *SE;
144 DominatorTree *DT;
145 const DataLayout &DL;
146 TargetLibraryInfo *TLI;
147 const TargetTransformInfo *TTI;
148 std::unique_ptr<MemorySSAUpdater> MSSAU;
149
150 SmallVector<WeakTrackingVH, 16> DeadInsts;
151 bool WidenIndVars;
152
153 bool handleFloatingPointIV(Loop *L, PHINode *PH);
154 bool rewriteNonIntegerIVs(Loop *L);
155
156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157 /// Try to improve our exit conditions by converting condition from signed
158 /// to unsigned or rotating computation out of the loop.
159 /// (See inline comment about why this is duplicated from simplifyAndExtend)
160 bool canonicalizeExitCondition(Loop *L);
161 /// Try to eliminate loop exits based on analyzeable exit counts
162 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
163 /// Try to form loop invariant tests for loop exits by changing how many
164 /// iterations of the loop run when that is unobservable.
165 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
166
167 bool rewriteFirstIterationLoopExitValues(Loop *L);
168
169 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
170 const SCEV *ExitCount,
171 PHINode *IndVar, SCEVExpander &Rewriter);
172
173 bool sinkUnusedInvariants(Loop *L);
174
175 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,MemorySSA * MSSA,bool WidenIndVars)176 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
177 const DataLayout &DL, TargetLibraryInfo *TLI,
178 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
179 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
180 WidenIndVars(WidenIndVars) {
181 if (MSSA)
182 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
183 }
184
185 bool run(Loop *L);
186 };
187
188 } // end anonymous namespace
189
190 //===----------------------------------------------------------------------===//
191 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
192 //===----------------------------------------------------------------------===//
193
194 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)195 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
196 bool isExact = false;
197 // See if we can convert this to an int64_t
198 uint64_t UIntVal;
199 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
200 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
201 !isExact)
202 return false;
203 IntVal = UIntVal;
204 return true;
205 }
206
207 /// If the loop has floating induction variable then insert corresponding
208 /// integer induction variable if possible.
209 /// For example,
210 /// for(double i = 0; i < 10000; ++i)
211 /// bar(i)
212 /// is converted into
213 /// for(int i = 0; i < 10000; ++i)
214 /// bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)215 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
216 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
217 unsigned BackEdge = IncomingEdge^1;
218
219 // Check incoming value.
220 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
221
222 int64_t InitValue;
223 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
224 return false;
225
226 // Check IV increment. Reject this PN if increment operation is not
227 // an add or increment value can not be represented by an integer.
228 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
229 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
230
231 // If this is not an add of the PHI with a constantfp, or if the constant fp
232 // is not an integer, bail out.
233 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
234 int64_t IncValue;
235 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
236 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
237 return false;
238
239 // Check Incr uses. One user is PN and the other user is an exit condition
240 // used by the conditional terminator.
241 Value::user_iterator IncrUse = Incr->user_begin();
242 Instruction *U1 = cast<Instruction>(*IncrUse++);
243 if (IncrUse == Incr->user_end()) return false;
244 Instruction *U2 = cast<Instruction>(*IncrUse++);
245 if (IncrUse != Incr->user_end()) return false;
246
247 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
248 // only used by a branch, we can't transform it.
249 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
250 if (!Compare)
251 Compare = dyn_cast<FCmpInst>(U2);
252 if (!Compare || !Compare->hasOneUse() ||
253 !isa<BranchInst>(Compare->user_back()))
254 return false;
255
256 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
257
258 // We need to verify that the branch actually controls the iteration count
259 // of the loop. If not, the new IV can overflow and no one will notice.
260 // The branch block must be in the loop and one of the successors must be out
261 // of the loop.
262 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
263 if (!L->contains(TheBr->getParent()) ||
264 (L->contains(TheBr->getSuccessor(0)) &&
265 L->contains(TheBr->getSuccessor(1))))
266 return false;
267
268 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
269 // transform it.
270 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
271 int64_t ExitValue;
272 if (ExitValueVal == nullptr ||
273 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
274 return false;
275
276 // Find new predicate for integer comparison.
277 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
278 switch (Compare->getPredicate()) {
279 default: return false; // Unknown comparison.
280 case CmpInst::FCMP_OEQ:
281 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
282 case CmpInst::FCMP_ONE:
283 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
284 case CmpInst::FCMP_OGT:
285 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
286 case CmpInst::FCMP_OGE:
287 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
288 case CmpInst::FCMP_OLT:
289 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
290 case CmpInst::FCMP_OLE:
291 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
292 }
293
294 // We convert the floating point induction variable to a signed i32 value if
295 // we can. This is only safe if the comparison will not overflow in a way
296 // that won't be trapped by the integer equivalent operations. Check for this
297 // now.
298 // TODO: We could use i64 if it is native and the range requires it.
299
300 // The start/stride/exit values must all fit in signed i32.
301 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
302 return false;
303
304 // If not actually striding (add x, 0.0), avoid touching the code.
305 if (IncValue == 0)
306 return false;
307
308 // Positive and negative strides have different safety conditions.
309 if (IncValue > 0) {
310 // If we have a positive stride, we require the init to be less than the
311 // exit value.
312 if (InitValue >= ExitValue)
313 return false;
314
315 uint32_t Range = uint32_t(ExitValue-InitValue);
316 // Check for infinite loop, either:
317 // while (i <= Exit) or until (i > Exit)
318 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
319 if (++Range == 0) return false; // Range overflows.
320 }
321
322 unsigned Leftover = Range % uint32_t(IncValue);
323
324 // If this is an equality comparison, we require that the strided value
325 // exactly land on the exit value, otherwise the IV condition will wrap
326 // around and do things the fp IV wouldn't.
327 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
328 Leftover != 0)
329 return false;
330
331 // If the stride would wrap around the i32 before exiting, we can't
332 // transform the IV.
333 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
334 return false;
335 } else {
336 // If we have a negative stride, we require the init to be greater than the
337 // exit value.
338 if (InitValue <= ExitValue)
339 return false;
340
341 uint32_t Range = uint32_t(InitValue-ExitValue);
342 // Check for infinite loop, either:
343 // while (i >= Exit) or until (i < Exit)
344 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
345 if (++Range == 0) return false; // Range overflows.
346 }
347
348 unsigned Leftover = Range % uint32_t(-IncValue);
349
350 // If this is an equality comparison, we require that the strided value
351 // exactly land on the exit value, otherwise the IV condition will wrap
352 // around and do things the fp IV wouldn't.
353 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
354 Leftover != 0)
355 return false;
356
357 // If the stride would wrap around the i32 before exiting, we can't
358 // transform the IV.
359 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
360 return false;
361 }
362
363 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
364
365 // Insert new integer induction variable.
366 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
367 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
368 PN->getIncomingBlock(IncomingEdge));
369
370 Value *NewAdd =
371 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
372 Incr->getName()+".int", Incr);
373 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
374
375 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
376 ConstantInt::get(Int32Ty, ExitValue),
377 Compare->getName());
378
379 // In the following deletions, PN may become dead and may be deleted.
380 // Use a WeakTrackingVH to observe whether this happens.
381 WeakTrackingVH WeakPH = PN;
382
383 // Delete the old floating point exit comparison. The branch starts using the
384 // new comparison.
385 NewCompare->takeName(Compare);
386 Compare->replaceAllUsesWith(NewCompare);
387 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
388
389 // Delete the old floating point increment.
390 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
391 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
392
393 // If the FP induction variable still has uses, this is because something else
394 // in the loop uses its value. In order to canonicalize the induction
395 // variable, we chose to eliminate the IV and rewrite it in terms of an
396 // int->fp cast.
397 //
398 // We give preference to sitofp over uitofp because it is faster on most
399 // platforms.
400 if (WeakPH) {
401 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
402 &*PN->getParent()->getFirstInsertionPt());
403 PN->replaceAllUsesWith(Conv);
404 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
405 }
406 return true;
407 }
408
rewriteNonIntegerIVs(Loop * L)409 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
410 // First step. Check to see if there are any floating-point recurrences.
411 // If there are, change them into integer recurrences, permitting analysis by
412 // the SCEV routines.
413 BasicBlock *Header = L->getHeader();
414
415 SmallVector<WeakTrackingVH, 8> PHIs;
416 for (PHINode &PN : Header->phis())
417 PHIs.push_back(&PN);
418
419 bool Changed = false;
420 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
421 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
422 Changed |= handleFloatingPointIV(L, PN);
423
424 // If the loop previously had floating-point IV, ScalarEvolution
425 // may not have been able to compute a trip count. Now that we've done some
426 // re-writing, the trip count may be computable.
427 if (Changed)
428 SE->forgetLoop(L);
429 return Changed;
430 }
431
432 //===---------------------------------------------------------------------===//
433 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
434 // they will exit at the first iteration.
435 //===---------------------------------------------------------------------===//
436
437 /// Check to see if this loop has loop invariant conditions which lead to loop
438 /// exits. If so, we know that if the exit path is taken, it is at the first
439 /// loop iteration. This lets us predict exit values of PHI nodes that live in
440 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)441 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
442 // Verify the input to the pass is already in LCSSA form.
443 assert(L->isLCSSAForm(*DT));
444
445 SmallVector<BasicBlock *, 8> ExitBlocks;
446 L->getUniqueExitBlocks(ExitBlocks);
447
448 bool MadeAnyChanges = false;
449 for (auto *ExitBB : ExitBlocks) {
450 // If there are no more PHI nodes in this exit block, then no more
451 // values defined inside the loop are used on this path.
452 for (PHINode &PN : ExitBB->phis()) {
453 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
454 IncomingValIdx != E; ++IncomingValIdx) {
455 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
456
457 // Can we prove that the exit must run on the first iteration if it
458 // runs at all? (i.e. early exits are fine for our purposes, but
459 // traces which lead to this exit being taken on the 2nd iteration
460 // aren't.) Note that this is about whether the exit branch is
461 // executed, not about whether it is taken.
462 if (!L->getLoopLatch() ||
463 !DT->dominates(IncomingBB, L->getLoopLatch()))
464 continue;
465
466 // Get condition that leads to the exit path.
467 auto *TermInst = IncomingBB->getTerminator();
468
469 Value *Cond = nullptr;
470 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
471 // Must be a conditional branch, otherwise the block
472 // should not be in the loop.
473 Cond = BI->getCondition();
474 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
475 Cond = SI->getCondition();
476 else
477 continue;
478
479 if (!L->isLoopInvariant(Cond))
480 continue;
481
482 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
483
484 // Only deal with PHIs in the loop header.
485 if (!ExitVal || ExitVal->getParent() != L->getHeader())
486 continue;
487
488 // If ExitVal is a PHI on the loop header, then we know its
489 // value along this exit because the exit can only be taken
490 // on the first iteration.
491 auto *LoopPreheader = L->getLoopPreheader();
492 assert(LoopPreheader && "Invalid loop");
493 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
494 if (PreheaderIdx != -1) {
495 assert(ExitVal->getParent() == L->getHeader() &&
496 "ExitVal must be in loop header");
497 MadeAnyChanges = true;
498 PN.setIncomingValue(IncomingValIdx,
499 ExitVal->getIncomingValue(PreheaderIdx));
500 SE->forgetValue(&PN);
501 }
502 }
503 }
504 }
505 return MadeAnyChanges;
506 }
507
508 //===----------------------------------------------------------------------===//
509 // IV Widening - Extend the width of an IV to cover its widest uses.
510 //===----------------------------------------------------------------------===//
511
512 /// Update information about the induction variable that is extended by this
513 /// sign or zero extend operation. This is used to determine the final width of
514 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)515 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
516 ScalarEvolution *SE,
517 const TargetTransformInfo *TTI) {
518 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
519 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
520 return;
521
522 Type *Ty = Cast->getType();
523 uint64_t Width = SE->getTypeSizeInBits(Ty);
524 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
525 return;
526
527 // Check that `Cast` actually extends the induction variable (we rely on this
528 // later). This takes care of cases where `Cast` is extending a truncation of
529 // the narrow induction variable, and thus can end up being narrower than the
530 // "narrow" induction variable.
531 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
532 if (NarrowIVWidth >= Width)
533 return;
534
535 // Cast is either an sext or zext up to this point.
536 // We should not widen an indvar if arithmetics on the wider indvar are more
537 // expensive than those on the narrower indvar. We check only the cost of ADD
538 // because at least an ADD is required to increment the induction variable. We
539 // could compute more comprehensively the cost of all instructions on the
540 // induction variable when necessary.
541 if (TTI &&
542 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
543 TTI->getArithmeticInstrCost(Instruction::Add,
544 Cast->getOperand(0)->getType())) {
545 return;
546 }
547
548 if (!WI.WidestNativeType ||
549 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
550 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
551 WI.IsSigned = IsSigned;
552 return;
553 }
554
555 // We extend the IV to satisfy the sign of its user(s), or 'signed'
556 // if there are multiple users with both sign- and zero extensions,
557 // in order not to introduce nondeterministic behaviour based on the
558 // unspecified order of a PHI nodes' users-iterator.
559 WI.IsSigned |= IsSigned;
560 }
561
562 //===----------------------------------------------------------------------===//
563 // Live IV Reduction - Minimize IVs live across the loop.
564 //===----------------------------------------------------------------------===//
565
566 //===----------------------------------------------------------------------===//
567 // Simplification of IV users based on SCEV evaluation.
568 //===----------------------------------------------------------------------===//
569
570 namespace {
571
572 class IndVarSimplifyVisitor : public IVVisitor {
573 ScalarEvolution *SE;
574 const TargetTransformInfo *TTI;
575 PHINode *IVPhi;
576
577 public:
578 WideIVInfo WI;
579
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)580 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
581 const TargetTransformInfo *TTI,
582 const DominatorTree *DTree)
583 : SE(SCEV), TTI(TTI), IVPhi(IV) {
584 DT = DTree;
585 WI.NarrowIV = IVPhi;
586 }
587
588 // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)589 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
590 };
591
592 } // end anonymous namespace
593
594 /// Iteratively perform simplification on a worklist of IV users. Each
595 /// successive simplification may push more users which may themselves be
596 /// candidates for simplification.
597 ///
598 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)599 bool IndVarSimplify::simplifyAndExtend(Loop *L,
600 SCEVExpander &Rewriter,
601 LoopInfo *LI) {
602 SmallVector<WideIVInfo, 8> WideIVs;
603
604 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
605 Intrinsic::getName(Intrinsic::experimental_guard));
606 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
607
608 SmallVector<PHINode *, 8> LoopPhis;
609 for (PHINode &PN : L->getHeader()->phis())
610 LoopPhis.push_back(&PN);
611
612 // Each round of simplification iterates through the SimplifyIVUsers worklist
613 // for all current phis, then determines whether any IVs can be
614 // widened. Widening adds new phis to LoopPhis, inducing another round of
615 // simplification on the wide IVs.
616 bool Changed = false;
617 while (!LoopPhis.empty()) {
618 // Evaluate as many IV expressions as possible before widening any IVs. This
619 // forces SCEV to set no-wrap flags before evaluating sign/zero
620 // extension. The first time SCEV attempts to normalize sign/zero extension,
621 // the result becomes final. So for the most predictable results, we delay
622 // evaluation of sign/zero extend evaluation until needed, and avoid running
623 // other SCEV based analysis prior to simplifyAndExtend.
624 do {
625 PHINode *CurrIV = LoopPhis.pop_back_val();
626
627 // Information about sign/zero extensions of CurrIV.
628 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
629
630 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
631 &Visitor);
632
633 if (Visitor.WI.WidestNativeType) {
634 WideIVs.push_back(Visitor.WI);
635 }
636 } while(!LoopPhis.empty());
637
638 // Continue if we disallowed widening.
639 if (!WidenIndVars)
640 continue;
641
642 for (; !WideIVs.empty(); WideIVs.pop_back()) {
643 unsigned ElimExt;
644 unsigned Widened;
645 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
646 DT, DeadInsts, ElimExt, Widened,
647 HasGuards, UsePostIncrementRanges)) {
648 NumElimExt += ElimExt;
649 NumWidened += Widened;
650 Changed = true;
651 LoopPhis.push_back(WidePhi);
652 }
653 }
654 }
655 return Changed;
656 }
657
658 //===----------------------------------------------------------------------===//
659 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
660 //===----------------------------------------------------------------------===//
661
662 /// Given an Value which is hoped to be part of an add recurance in the given
663 /// loop, return the associated Phi node if so. Otherwise, return null. Note
664 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)665 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
666 Instruction *IncI = dyn_cast<Instruction>(IncV);
667 if (!IncI)
668 return nullptr;
669
670 switch (IncI->getOpcode()) {
671 case Instruction::Add:
672 case Instruction::Sub:
673 break;
674 case Instruction::GetElementPtr:
675 // An IV counter must preserve its type.
676 if (IncI->getNumOperands() == 2)
677 break;
678 LLVM_FALLTHROUGH;
679 default:
680 return nullptr;
681 }
682
683 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
684 if (Phi && Phi->getParent() == L->getHeader()) {
685 if (L->isLoopInvariant(IncI->getOperand(1)))
686 return Phi;
687 return nullptr;
688 }
689 if (IncI->getOpcode() == Instruction::GetElementPtr)
690 return nullptr;
691
692 // Allow add/sub to be commuted.
693 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
694 if (Phi && Phi->getParent() == L->getHeader()) {
695 if (L->isLoopInvariant(IncI->getOperand(0)))
696 return Phi;
697 }
698 return nullptr;
699 }
700
701 /// Whether the current loop exit test is based on this value. Currently this
702 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)703 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
704 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
705 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
706 // TODO: Allow non-icmp loop test.
707 if (!ICmp)
708 return false;
709
710 // TODO: Allow indirect use.
711 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
712 }
713
714 /// linearFunctionTestReplace policy. Return true unless we can show that the
715 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)716 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
717 assert(L->getLoopLatch() && "Must be in simplified form");
718
719 // Avoid converting a constant or loop invariant test back to a runtime
720 // test. This is critical for when SCEV's cached ExitCount is less precise
721 // than the current IR (such as after we've proven a particular exit is
722 // actually dead and thus the BE count never reaches our ExitCount.)
723 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
724 if (L->isLoopInvariant(BI->getCondition()))
725 return false;
726
727 // Do LFTR to simplify the exit condition to an ICMP.
728 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
729 if (!Cond)
730 return true;
731
732 // Do LFTR to simplify the exit ICMP to EQ/NE
733 ICmpInst::Predicate Pred = Cond->getPredicate();
734 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
735 return true;
736
737 // Look for a loop invariant RHS
738 Value *LHS = Cond->getOperand(0);
739 Value *RHS = Cond->getOperand(1);
740 if (!L->isLoopInvariant(RHS)) {
741 if (!L->isLoopInvariant(LHS))
742 return true;
743 std::swap(LHS, RHS);
744 }
745 // Look for a simple IV counter LHS
746 PHINode *Phi = dyn_cast<PHINode>(LHS);
747 if (!Phi)
748 Phi = getLoopPhiForCounter(LHS, L);
749
750 if (!Phi)
751 return true;
752
753 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
754 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
755 if (Idx < 0)
756 return true;
757
758 // Do LFTR if the exit condition's IV is *not* a simple counter.
759 Value *IncV = Phi->getIncomingValue(Idx);
760 return Phi != getLoopPhiForCounter(IncV, L);
761 }
762
763 /// Return true if undefined behavior would provable be executed on the path to
764 /// OnPathTo if Root produced a posion result. Note that this doesn't say
765 /// anything about whether OnPathTo is actually executed or whether Root is
766 /// actually poison. This can be used to assess whether a new use of Root can
767 /// be added at a location which is control equivalent with OnPathTo (such as
768 /// immediately before it) without introducing UB which didn't previously
769 /// exist. Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)770 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
771 Instruction *OnPathTo,
772 DominatorTree *DT) {
773 // Basic approach is to assume Root is poison, propagate poison forward
774 // through all users we can easily track, and then check whether any of those
775 // users are provable UB and must execute before out exiting block might
776 // exit.
777
778 // The set of all recursive users we've visited (which are assumed to all be
779 // poison because of said visit)
780 SmallSet<const Value *, 16> KnownPoison;
781 SmallVector<const Instruction*, 16> Worklist;
782 Worklist.push_back(Root);
783 while (!Worklist.empty()) {
784 const Instruction *I = Worklist.pop_back_val();
785
786 // If we know this must trigger UB on a path leading our target.
787 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
788 return true;
789
790 // If we can't analyze propagation through this instruction, just skip it
791 // and transitive users. Safe as false is a conservative result.
792 if (!propagatesPoison(cast<Operator>(I)) && I != Root)
793 continue;
794
795 if (KnownPoison.insert(I).second)
796 for (const User *User : I->users())
797 Worklist.push_back(cast<Instruction>(User));
798 }
799
800 // Might be non-UB, or might have a path we couldn't prove must execute on
801 // way to exiting bb.
802 return false;
803 }
804
805 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
806 /// down to checking that all operands are constant and listing instructions
807 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)808 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
809 unsigned Depth) {
810 if (isa<Constant>(V))
811 return !isa<UndefValue>(V);
812
813 if (Depth >= 6)
814 return false;
815
816 // Conservatively handle non-constant non-instructions. For example, Arguments
817 // may be undef.
818 Instruction *I = dyn_cast<Instruction>(V);
819 if (!I)
820 return false;
821
822 // Load and return values may be undef.
823 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
824 return false;
825
826 // Optimistically handle other instructions.
827 for (Value *Op : I->operands()) {
828 if (!Visited.insert(Op).second)
829 continue;
830 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
831 return false;
832 }
833 return true;
834 }
835
836 /// Return true if the given value is concrete. We must prove that undef can
837 /// never reach it.
838 ///
839 /// TODO: If we decide that this is a good approach to checking for undef, we
840 /// may factor it into a common location.
hasConcreteDef(Value * V)841 static bool hasConcreteDef(Value *V) {
842 SmallPtrSet<Value*, 8> Visited;
843 Visited.insert(V);
844 return hasConcreteDefImpl(V, Visited, 0);
845 }
846
847 /// Return true if this IV has any uses other than the (soon to be rewritten)
848 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)849 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
850 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
851 Value *IncV = Phi->getIncomingValue(LatchIdx);
852
853 for (User *U : Phi->users())
854 if (U != Cond && U != IncV) return false;
855
856 for (User *U : IncV->users())
857 if (U != Cond && U != Phi) return false;
858 return true;
859 }
860
861 /// Return true if the given phi is a "counter" in L. A counter is an
862 /// add recurance (of integer or pointer type) with an arbitrary start, and a
863 /// step of 1. Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)864 static bool isLoopCounter(PHINode* Phi, Loop *L,
865 ScalarEvolution *SE) {
866 assert(Phi->getParent() == L->getHeader());
867 assert(L->getLoopLatch());
868
869 if (!SE->isSCEVable(Phi->getType()))
870 return false;
871
872 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
873 if (!AR || AR->getLoop() != L || !AR->isAffine())
874 return false;
875
876 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
877 if (!Step || !Step->isOne())
878 return false;
879
880 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
881 Value *IncV = Phi->getIncomingValue(LatchIdx);
882 return (getLoopPhiForCounter(IncV, L) == Phi &&
883 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
884 }
885
886 /// Search the loop header for a loop counter (anadd rec w/step of one)
887 /// suitable for use by LFTR. If multiple counters are available, select the
888 /// "best" one based profitable heuristics.
889 ///
890 /// BECount may be an i8* pointer type. The pointer difference is already
891 /// valid count without scaling the address stride, so it remains a pointer
892 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)893 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
894 const SCEV *BECount,
895 ScalarEvolution *SE, DominatorTree *DT) {
896 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
897
898 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
899
900 // Loop over all of the PHI nodes, looking for a simple counter.
901 PHINode *BestPhi = nullptr;
902 const SCEV *BestInit = nullptr;
903 BasicBlock *LatchBlock = L->getLoopLatch();
904 assert(LatchBlock && "Must be in simplified form");
905 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
906
907 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
908 PHINode *Phi = cast<PHINode>(I);
909 if (!isLoopCounter(Phi, L, SE))
910 continue;
911
912 // Avoid comparing an integer IV against a pointer Limit.
913 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
914 continue;
915
916 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
917
918 // AR may be a pointer type, while BECount is an integer type.
919 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
920 // AR may not be a narrower type, or we may never exit.
921 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
922 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
923 continue;
924
925 // Avoid reusing a potentially undef value to compute other values that may
926 // have originally had a concrete definition.
927 if (!hasConcreteDef(Phi)) {
928 // We explicitly allow unknown phis as long as they are already used by
929 // the loop exit test. This is legal since performing LFTR could not
930 // increase the number of undef users.
931 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
932 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
933 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
934 continue;
935 }
936
937 // Avoid introducing undefined behavior due to poison which didn't exist in
938 // the original program. (Annoyingly, the rules for poison and undef
939 // propagation are distinct, so this does NOT cover the undef case above.)
940 // We have to ensure that we don't introduce UB by introducing a use on an
941 // iteration where said IV produces poison. Our strategy here differs for
942 // pointers and integer IVs. For integers, we strip and reinfer as needed,
943 // see code in linearFunctionTestReplace. For pointers, we restrict
944 // transforms as there is no good way to reinfer inbounds once lost.
945 if (!Phi->getType()->isIntegerTy() &&
946 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
947 continue;
948
949 const SCEV *Init = AR->getStart();
950
951 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
952 // Don't force a live loop counter if another IV can be used.
953 if (AlmostDeadIV(Phi, LatchBlock, Cond))
954 continue;
955
956 // Prefer to count-from-zero. This is a more "canonical" counter form. It
957 // also prefers integer to pointer IVs.
958 if (BestInit->isZero() != Init->isZero()) {
959 if (BestInit->isZero())
960 continue;
961 }
962 // If two IVs both count from zero or both count from nonzero then the
963 // narrower is likely a dead phi that has been widened. Use the wider phi
964 // to allow the other to be eliminated.
965 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
966 continue;
967 }
968 BestPhi = Phi;
969 BestInit = Init;
970 }
971 return BestPhi;
972 }
973
974 /// Insert an IR expression which computes the value held by the IV IndVar
975 /// (which must be an loop counter w/unit stride) after the backedge of loop L
976 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)977 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
978 const SCEV *ExitCount, bool UsePostInc, Loop *L,
979 SCEVExpander &Rewriter, ScalarEvolution *SE) {
980 assert(isLoopCounter(IndVar, L, SE));
981 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
982 const SCEV *IVInit = AR->getStart();
983 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
984
985 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988 // the existing GEPs whenever possible.
989 if (IndVar->getType()->isPointerTy() &&
990 !ExitCount->getType()->isPointerTy()) {
991 // IVOffset will be the new GEP offset that is interpreted by GEP as a
992 // signed value. ExitCount on the other hand represents the loop trip count,
993 // which is an unsigned value. FindLoopCounter only allows induction
994 // variables that have a positive unit stride of one. This means we don't
995 // have to handle the case of negative offsets (yet) and just need to zero
996 // extend ExitCount.
997 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999 if (UsePostInc)
1000 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001
1002 // Expand the code for the iteration count.
1003 assert(SE->isLoopInvariant(IVOffset, L) &&
1004 "Computed iteration count is not loop invariant!");
1005
1006 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1007 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1008 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1009 } else {
1010 // In any other case, convert both IVInit and ExitCount to integers before
1011 // comparing. This may result in SCEV expansion of pointers, but in practice
1012 // SCEV will fold the pointer arithmetic away as such:
1013 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1014 //
1015 // Valid Cases: (1) both integers is most common; (2) both may be pointers
1016 // for simple memset-style loops.
1017 //
1018 // IVInit integer and ExitCount pointer would only occur if a canonical IV
1019 // were generated on top of case #2, which is not expected.
1020
1021 // For unit stride, IVCount = Start + ExitCount with 2's complement
1022 // overflow.
1023
1024 // For integer IVs, truncate the IV before computing IVInit + BECount,
1025 // unless we know apriori that the limit must be a constant when evaluated
1026 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1027 // of the IV in the loop over a (potentially) expensive expansion of the
1028 // widened exit count add(zext(add)) expression.
1029 if (SE->getTypeSizeInBits(IVInit->getType())
1030 > SE->getTypeSizeInBits(ExitCount->getType())) {
1031 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1032 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1033 else
1034 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1035 }
1036
1037 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1038
1039 if (UsePostInc)
1040 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1041
1042 // Expand the code for the iteration count.
1043 assert(SE->isLoopInvariant(IVLimit, L) &&
1044 "Computed iteration count is not loop invariant!");
1045 // Ensure that we generate the same type as IndVar, or a smaller integer
1046 // type. In the presence of null pointer values, we have an integer type
1047 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1048 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1049 IndVar->getType() : ExitCount->getType();
1050 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1051 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1052 }
1053 }
1054
1055 /// This method rewrites the exit condition of the loop to be a canonical !=
1056 /// comparison against the incremented loop induction variable. This pass is
1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1058 /// determine a loop-invariant trip count of the loop, which is actually a much
1059 /// broader range than just linear tests.
1060 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1062 const SCEV *ExitCount,
1063 PHINode *IndVar, SCEVExpander &Rewriter) {
1064 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1065 assert(isLoopCounter(IndVar, L, SE));
1066 Instruction * const IncVar =
1067 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1068
1069 // Initialize CmpIndVar to the preincremented IV.
1070 Value *CmpIndVar = IndVar;
1071 bool UsePostInc = false;
1072
1073 // If the exiting block is the same as the backedge block, we prefer to
1074 // compare against the post-incremented value, otherwise we must compare
1075 // against the preincremented value.
1076 if (ExitingBB == L->getLoopLatch()) {
1077 // For pointer IVs, we chose to not strip inbounds which requires us not
1078 // to add a potentially UB introducing use. We need to either a) show
1079 // the loop test we're modifying is already in post-inc form, or b) show
1080 // that adding a use must not introduce UB.
1081 bool SafeToPostInc =
1082 IndVar->getType()->isIntegerTy() ||
1083 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1084 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1085 if (SafeToPostInc) {
1086 UsePostInc = true;
1087 CmpIndVar = IncVar;
1088 }
1089 }
1090
1091 // It may be necessary to drop nowrap flags on the incrementing instruction
1092 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1093 // case the increment might have previously been poison on the last iteration
1094 // only) or if LFTR switches to a different IV that was previously dynamically
1095 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1096 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1097 // check), because the pre-inc addrec flags may be adopted from the original
1098 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1099 // TODO: This handling is inaccurate for one case: If we switch to a
1100 // dynamically dead IV that wraps on the first loop iteration only, which is
1101 // not covered by the post-inc addrec. (If the new IV was not dynamically
1102 // dead, it could not be poison on the first iteration in the first place.)
1103 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1104 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1105 if (BO->hasNoUnsignedWrap())
1106 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1107 if (BO->hasNoSignedWrap())
1108 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1109 }
1110
1111 Value *ExitCnt = genLoopLimit(
1112 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1113 assert(ExitCnt->getType()->isPointerTy() ==
1114 IndVar->getType()->isPointerTy() &&
1115 "genLoopLimit missed a cast");
1116
1117 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1118 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1119 ICmpInst::Predicate P;
1120 if (L->contains(BI->getSuccessor(0)))
1121 P = ICmpInst::ICMP_NE;
1122 else
1123 P = ICmpInst::ICMP_EQ;
1124
1125 IRBuilder<> Builder(BI);
1126
1127 // The new loop exit condition should reuse the debug location of the
1128 // original loop exit condition.
1129 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1130 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1131
1132 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1133 // avoid the expensive expansion of the limit expression in the wider type,
1134 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1135 // since we know (from the exit count bitwidth), that we can't self-wrap in
1136 // the narrower type.
1137 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1138 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1139 if (CmpIndVarSize > ExitCntSize) {
1140 assert(!CmpIndVar->getType()->isPointerTy() &&
1141 !ExitCnt->getType()->isPointerTy());
1142
1143 // Before resorting to actually inserting the truncate, use the same
1144 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1145 // the other side of the comparison instead. We still evaluate the limit
1146 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1147 // a truncate within in.
1148 bool Extended = false;
1149 const SCEV *IV = SE->getSCEV(CmpIndVar);
1150 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1151 ExitCnt->getType());
1152 const SCEV *ZExtTrunc =
1153 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1154
1155 if (ZExtTrunc == IV) {
1156 Extended = true;
1157 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1158 "wide.trip.count");
1159 } else {
1160 const SCEV *SExtTrunc =
1161 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1162 if (SExtTrunc == IV) {
1163 Extended = true;
1164 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1165 "wide.trip.count");
1166 }
1167 }
1168
1169 if (Extended) {
1170 bool Discard;
1171 L->makeLoopInvariant(ExitCnt, Discard);
1172 } else
1173 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1174 "lftr.wideiv");
1175 }
1176 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1177 << " LHS:" << *CmpIndVar << '\n'
1178 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1179 << "\n"
1180 << " RHS:\t" << *ExitCnt << "\n"
1181 << "ExitCount:\t" << *ExitCount << "\n"
1182 << " was: " << *BI->getCondition() << "\n");
1183
1184 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1185 Value *OrigCond = BI->getCondition();
1186 // It's tempting to use replaceAllUsesWith here to fully replace the old
1187 // comparison, but that's not immediately safe, since users of the old
1188 // comparison may not be dominated by the new comparison. Instead, just
1189 // update the branch to use the new comparison; in the common case this
1190 // will make old comparison dead.
1191 BI->setCondition(Cond);
1192 DeadInsts.emplace_back(OrigCond);
1193
1194 ++NumLFTR;
1195 return true;
1196 }
1197
1198 //===----------------------------------------------------------------------===//
1199 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1200 //===----------------------------------------------------------------------===//
1201
1202 /// If there's a single exit block, sink any loop-invariant values that
1203 /// were defined in the preheader but not used inside the loop into the
1204 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1206 BasicBlock *ExitBlock = L->getExitBlock();
1207 if (!ExitBlock) return false;
1208
1209 BasicBlock *Preheader = L->getLoopPreheader();
1210 if (!Preheader) return false;
1211
1212 bool MadeAnyChanges = false;
1213 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1214 BasicBlock::iterator I(Preheader->getTerminator());
1215 while (I != Preheader->begin()) {
1216 --I;
1217 // New instructions were inserted at the end of the preheader.
1218 if (isa<PHINode>(I))
1219 break;
1220
1221 // Don't move instructions which might have side effects, since the side
1222 // effects need to complete before instructions inside the loop. Also don't
1223 // move instructions which might read memory, since the loop may modify
1224 // memory. Note that it's okay if the instruction might have undefined
1225 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1226 // block.
1227 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1228 continue;
1229
1230 // Skip debug info intrinsics.
1231 if (isa<DbgInfoIntrinsic>(I))
1232 continue;
1233
1234 // Skip eh pad instructions.
1235 if (I->isEHPad())
1236 continue;
1237
1238 // Don't sink alloca: we never want to sink static alloca's out of the
1239 // entry block, and correctly sinking dynamic alloca's requires
1240 // checks for stacksave/stackrestore intrinsics.
1241 // FIXME: Refactor this check somehow?
1242 if (isa<AllocaInst>(I))
1243 continue;
1244
1245 // Determine if there is a use in or before the loop (direct or
1246 // otherwise).
1247 bool UsedInLoop = false;
1248 for (Use &U : I->uses()) {
1249 Instruction *User = cast<Instruction>(U.getUser());
1250 BasicBlock *UseBB = User->getParent();
1251 if (PHINode *P = dyn_cast<PHINode>(User)) {
1252 unsigned i =
1253 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1254 UseBB = P->getIncomingBlock(i);
1255 }
1256 if (UseBB == Preheader || L->contains(UseBB)) {
1257 UsedInLoop = true;
1258 break;
1259 }
1260 }
1261
1262 // If there is, the def must remain in the preheader.
1263 if (UsedInLoop)
1264 continue;
1265
1266 // Otherwise, sink it to the exit block.
1267 Instruction *ToMove = &*I;
1268 bool Done = false;
1269
1270 if (I != Preheader->begin()) {
1271 // Skip debug info intrinsics.
1272 do {
1273 --I;
1274 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1275
1276 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1277 Done = true;
1278 } else {
1279 Done = true;
1280 }
1281
1282 MadeAnyChanges = true;
1283 ToMove->moveBefore(*ExitBlock, InsertPt);
1284 if (Done) break;
1285 InsertPt = ToMove->getIterator();
1286 }
1287
1288 return MadeAnyChanges;
1289 }
1290
replaceExitCond(BranchInst * BI,Value * NewCond,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1291 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1292 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1293 auto *OldCond = BI->getCondition();
1294 BI->setCondition(NewCond);
1295 if (OldCond->use_empty())
1296 DeadInsts.emplace_back(OldCond);
1297 }
1298
foldExit(const Loop * L,BasicBlock * ExitingBB,bool IsTaken,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1299 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1300 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1301 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1302 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1303 auto *OldCond = BI->getCondition();
1304 auto *NewCond =
1305 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1306 replaceExitCond(BI, NewCond, DeadInsts);
1307 }
1308
replaceLoopPHINodesWithPreheaderValues(LoopInfo * LI,Loop * L,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1309 static void replaceLoopPHINodesWithPreheaderValues(
1310 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1311 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1312 auto *LoopPreheader = L->getLoopPreheader();
1313 auto *LoopHeader = L->getHeader();
1314 SmallVector<Instruction *> Worklist;
1315 for (auto &PN : LoopHeader->phis()) {
1316 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1317 for (User *U : PN.users())
1318 Worklist.push_back(cast<Instruction>(U));
1319 PN.replaceAllUsesWith(PreheaderIncoming);
1320 DeadInsts.emplace_back(&PN);
1321 }
1322
1323 // Replacing with the preheader value will often allow IV users to simplify
1324 // (especially if the preheader value is a constant).
1325 SmallPtrSet<Instruction *, 16> Visited;
1326 while (!Worklist.empty()) {
1327 auto *I = cast<Instruction>(Worklist.pop_back_val());
1328 if (!Visited.insert(I).second)
1329 continue;
1330
1331 // Don't simplify instructions outside the loop.
1332 if (!L->contains(I))
1333 continue;
1334
1335 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1336 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1337 for (User *U : I->users())
1338 Worklist.push_back(cast<Instruction>(U));
1339 I->replaceAllUsesWith(Res);
1340 DeadInsts.emplace_back(I);
1341 }
1342 }
1343 }
1344
replaceWithInvariantCond(const Loop * L,BasicBlock * ExitingBB,ICmpInst::Predicate InvariantPred,const SCEV * InvariantLHS,const SCEV * InvariantRHS,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1345 static void replaceWithInvariantCond(
1346 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1347 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1348 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1349 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1350 Rewriter.setInsertPoint(BI);
1351 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1352 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1353 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1354 if (ExitIfTrue)
1355 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1356 IRBuilder<> Builder(BI);
1357 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1358 BI->getCondition()->getName());
1359 replaceExitCond(BI, NewCond, DeadInsts);
1360 }
1361
optimizeLoopExitWithUnknownExitCount(const Loop * L,BranchInst * BI,BasicBlock * ExitingBB,const SCEV * MaxIter,bool Inverted,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1362 static bool optimizeLoopExitWithUnknownExitCount(
1363 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1364 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1365 ScalarEvolution *SE, SCEVExpander &Rewriter,
1366 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1367 ICmpInst::Predicate Pred;
1368 Value *LHS, *RHS;
1369 BasicBlock *TrueSucc, *FalseSucc;
1370 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1371 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1372 return false;
1373
1374 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1375 "Not a loop exit!");
1376
1377 // 'LHS pred RHS' should now mean that we stay in loop.
1378 if (L->contains(FalseSucc))
1379 Pred = CmpInst::getInversePredicate(Pred);
1380
1381 // If we are proving loop exit, invert the predicate.
1382 if (Inverted)
1383 Pred = CmpInst::getInversePredicate(Pred);
1384
1385 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1386 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1387 // Can we prove it to be trivially true?
1388 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1389 foldExit(L, ExitingBB, Inverted, DeadInsts);
1390 return true;
1391 }
1392 // Further logic works for non-inverted condition only.
1393 if (Inverted)
1394 return false;
1395
1396 auto *ARTy = LHSS->getType();
1397 auto *MaxIterTy = MaxIter->getType();
1398 // If possible, adjust types.
1399 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1400 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1401 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1402 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1403 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1404 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1405 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1406 }
1407
1408 if (SkipLastIter) {
1409 const SCEV *One = SE->getOne(MaxIter->getType());
1410 MaxIter = SE->getMinusSCEV(MaxIter, One);
1411 }
1412
1413 // Check if there is a loop-invariant predicate equivalent to our check.
1414 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1415 L, BI, MaxIter);
1416 if (!LIP)
1417 return false;
1418
1419 // Can we prove it to be trivially true?
1420 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1421 foldExit(L, ExitingBB, Inverted, DeadInsts);
1422 else
1423 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1424 Rewriter, DeadInsts);
1425
1426 return true;
1427 }
1428
canonicalizeExitCondition(Loop * L)1429 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1430 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1431 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1432 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1433 // it already has flags. The alternative to this would be to extending the
1434 // set of "interesting" IV users to include the icmp, but doing that
1435 // regresses results in practice by querying SCEVs before trip counts which
1436 // rely on them which results in SCEV caching sub-optimal answers. The
1437 // concern about caching sub-optimal results is why we only query SCEVs of
1438 // the loop invariant RHS here.
1439 SmallVector<BasicBlock*, 16> ExitingBlocks;
1440 L->getExitingBlocks(ExitingBlocks);
1441 bool Changed = false;
1442 for (auto *ExitingBB : ExitingBlocks) {
1443 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1444 if (!BI)
1445 continue;
1446 assert(BI->isConditional() && "exit branch must be conditional");
1447
1448 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1449 if (!ICmp || !ICmp->hasOneUse())
1450 continue;
1451
1452 auto *LHS = ICmp->getOperand(0);
1453 auto *RHS = ICmp->getOperand(1);
1454 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1455 // poisoning cache with sub-optimal results. For the must-execute case,
1456 // this is a neccessary precondition for correctness.
1457 if (!L->isLoopInvariant(RHS)) {
1458 if (!L->isLoopInvariant(LHS))
1459 continue;
1460 // Same logic applies for the inverse case
1461 std::swap(LHS, RHS);
1462 }
1463
1464 // Match (icmp signed-cond zext, RHS)
1465 Value *LHSOp = nullptr;
1466 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1467 continue;
1468
1469 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1470 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1471 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1472 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1473 FullCR = FullCR.zeroExtend(OuterBitWidth);
1474 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1475 if (FullCR.contains(RHSCR)) {
1476 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1477 // replace the signed condition with the unsigned version.
1478 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1479 Changed = true;
1480 // Note: No SCEV invalidation needed. We've changed the predicate, but
1481 // have not changed exit counts, or the values produced by the compare.
1482 continue;
1483 }
1484 }
1485
1486 // Now that we've canonicalized the condition to match the extend,
1487 // see if we can rotate the extend out of the loop.
1488 for (auto *ExitingBB : ExitingBlocks) {
1489 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1490 if (!BI)
1491 continue;
1492 assert(BI->isConditional() && "exit branch must be conditional");
1493
1494 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1495 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1496 continue;
1497
1498 bool Swapped = false;
1499 auto *LHS = ICmp->getOperand(0);
1500 auto *RHS = ICmp->getOperand(1);
1501 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1502 // Nothing to rotate
1503 continue;
1504 if (L->isLoopInvariant(LHS)) {
1505 // Same logic applies for the inverse case until we actually pick
1506 // which operand of the compare to update.
1507 Swapped = true;
1508 std::swap(LHS, RHS);
1509 }
1510 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1511
1512 // Match (icmp unsigned-cond zext, RHS)
1513 // TODO: Extend to handle corresponding sext/signed-cmp case
1514 // TODO: Extend to other invertible functions
1515 Value *LHSOp = nullptr;
1516 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1517 continue;
1518
1519 // In general, we only rotate if we can do so without increasing the number
1520 // of instructions. The exception is when we have an zext(add-rec). The
1521 // reason for allowing this exception is that we know we need to get rid
1522 // of the zext for SCEV to be able to compute a trip count for said loops;
1523 // we consider the new trip count valuable enough to increase instruction
1524 // count by one.
1525 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1526 continue;
1527
1528 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1529 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1530 // when zext is loop varying and RHS is loop invariant. This converts
1531 // loop varying work to loop-invariant work.
1532 auto doRotateTransform = [&]() {
1533 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1534 auto *NewRHS =
1535 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1536 L->getLoopPreheader()->getTerminator());
1537 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1538 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1539 if (LHS->use_empty())
1540 DeadInsts.push_back(LHS);
1541 };
1542
1543
1544 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1545 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1546 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1547 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1548 FullCR = FullCR.zeroExtend(OuterBitWidth);
1549 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1550 if (FullCR.contains(RHSCR)) {
1551 doRotateTransform();
1552 Changed = true;
1553 // Note, we are leaving SCEV in an unfortunately imprecise case here
1554 // as rotation tends to reveal information about trip counts not
1555 // previously visible.
1556 continue;
1557 }
1558 }
1559
1560 return Changed;
1561 }
1562
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)1563 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1564 SmallVector<BasicBlock*, 16> ExitingBlocks;
1565 L->getExitingBlocks(ExitingBlocks);
1566
1567 // Remove all exits which aren't both rewriteable and execute on every
1568 // iteration.
1569 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1570 // If our exitting block exits multiple loops, we can only rewrite the
1571 // innermost one. Otherwise, we're changing how many times the innermost
1572 // loop runs before it exits.
1573 if (LI->getLoopFor(ExitingBB) != L)
1574 return true;
1575
1576 // Can't rewrite non-branch yet.
1577 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1578 if (!BI)
1579 return true;
1580
1581 // Likewise, the loop latch must be dominated by the exiting BB.
1582 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1583 return true;
1584
1585 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1586 // If already constant, nothing to do. However, if this is an
1587 // unconditional exit, we can still replace header phis with their
1588 // preheader value.
1589 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1590 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
1591 return true;
1592 }
1593
1594 return false;
1595 });
1596
1597 if (ExitingBlocks.empty())
1598 return false;
1599
1600 // Get a symbolic upper bound on the loop backedge taken count.
1601 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1602 if (isa<SCEVCouldNotCompute>(MaxExitCount))
1603 return false;
1604
1605 // Visit our exit blocks in order of dominance. We know from the fact that
1606 // all exits must dominate the latch, so there is a total dominance order
1607 // between them.
1608 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1609 // std::sort sorts in ascending order, so we want the inverse of
1610 // the normal dominance relation.
1611 if (A == B) return false;
1612 if (DT->properlyDominates(A, B))
1613 return true;
1614 else {
1615 assert(DT->properlyDominates(B, A) &&
1616 "expected total dominance order!");
1617 return false;
1618 }
1619 });
1620 #ifdef ASSERT
1621 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1622 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1623 }
1624 #endif
1625
1626 bool Changed = false;
1627 bool SkipLastIter = false;
1628 SmallSet<const SCEV*, 8> DominatingExitCounts;
1629 for (BasicBlock *ExitingBB : ExitingBlocks) {
1630 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1631 if (isa<SCEVCouldNotCompute>(ExitCount)) {
1632 // Okay, we do not know the exit count here. Can we at least prove that it
1633 // will remain the same within iteration space?
1634 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1635 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1636 return optimizeLoopExitWithUnknownExitCount(
1637 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1638 Rewriter, DeadInsts);
1639 };
1640
1641 // TODO: We might have proved that we can skip the last iteration for
1642 // this check. In this case, we only want to check the condition on the
1643 // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1644 // corner case:
1645 //
1646 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1647 //
1648 // If we could not prove that len != 0, then we also could not prove that
1649 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1650 // OptimizeCond will likely not prove anything for it, even if it could
1651 // prove the same fact for len.
1652 //
1653 // As a temporary solution, we query both last and pre-last iterations in
1654 // hope that we will be able to prove triviality for at least one of
1655 // them. We can stop querying MaxExitCount for this case once SCEV
1656 // understands that (MaxExitCount - 1) will not overflow here.
1657 if (OptimizeCond(false, false) || OptimizeCond(true, false))
1658 Changed = true;
1659 else if (SkipLastIter)
1660 if (OptimizeCond(false, true) || OptimizeCond(true, true))
1661 Changed = true;
1662 continue;
1663 }
1664
1665 if (MaxExitCount == ExitCount)
1666 // If the loop has more than 1 iteration, all further checks will be
1667 // executed 1 iteration less.
1668 SkipLastIter = true;
1669
1670 // If we know we'd exit on the first iteration, rewrite the exit to
1671 // reflect this. This does not imply the loop must exit through this
1672 // exit; there may be an earlier one taken on the first iteration.
1673 // We know that the backedge can't be taken, so we replace all
1674 // the header PHIs with values coming from the preheader.
1675 if (ExitCount->isZero()) {
1676 foldExit(L, ExitingBB, true, DeadInsts);
1677 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
1678 Changed = true;
1679 continue;
1680 }
1681
1682 assert(ExitCount->getType()->isIntegerTy() &&
1683 MaxExitCount->getType()->isIntegerTy() &&
1684 "Exit counts must be integers");
1685
1686 Type *WiderType =
1687 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1688 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1689 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1690 assert(MaxExitCount->getType() == ExitCount->getType());
1691
1692 // Can we prove that some other exit must be taken strictly before this
1693 // one?
1694 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1695 MaxExitCount, ExitCount)) {
1696 foldExit(L, ExitingBB, false, DeadInsts);
1697 Changed = true;
1698 continue;
1699 }
1700
1701 // As we run, keep track of which exit counts we've encountered. If we
1702 // find a duplicate, we've found an exit which would have exited on the
1703 // exiting iteration, but (from the visit order) strictly follows another
1704 // which does the same and is thus dead.
1705 if (!DominatingExitCounts.insert(ExitCount).second) {
1706 foldExit(L, ExitingBB, false, DeadInsts);
1707 Changed = true;
1708 continue;
1709 }
1710
1711 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1712 // here. If we kept track of the min of dominanting exits so far, we could
1713 // discharge exits with EC >= MDEC. This is less powerful than the existing
1714 // transform (since later exits aren't considered), but potentially more
1715 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1716 // or a >u b. Such a case is not currently known.
1717 }
1718 return Changed;
1719 }
1720
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)1721 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1722 SmallVector<BasicBlock*, 16> ExitingBlocks;
1723 L->getExitingBlocks(ExitingBlocks);
1724
1725 // Finally, see if we can rewrite our exit conditions into a loop invariant
1726 // form. If we have a read-only loop, and we can tell that we must exit down
1727 // a path which does not need any of the values computed within the loop, we
1728 // can rewrite the loop to exit on the first iteration. Note that this
1729 // doesn't either a) tell us the loop exits on the first iteration (unless
1730 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1731 // This transformation looks a lot like a restricted form of dead loop
1732 // elimination, but restricted to read-only loops and without neccesssarily
1733 // needing to kill the loop entirely.
1734 if (!LoopPredication)
1735 return false;
1736
1737 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1738 // through *explicit* control flow. We have to eliminate the possibility of
1739 // implicit exits (see below) before we know it's truly exact.
1740 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1741 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1742 return false;
1743
1744 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1745 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1746
1747 auto BadExit = [&](BasicBlock *ExitingBB) {
1748 // If our exiting block exits multiple loops, we can only rewrite the
1749 // innermost one. Otherwise, we're changing how many times the innermost
1750 // loop runs before it exits.
1751 if (LI->getLoopFor(ExitingBB) != L)
1752 return true;
1753
1754 // Can't rewrite non-branch yet.
1755 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1756 if (!BI)
1757 return true;
1758
1759 // If already constant, nothing to do.
1760 if (isa<Constant>(BI->getCondition()))
1761 return true;
1762
1763 // If the exit block has phis, we need to be able to compute the values
1764 // within the loop which contains them. This assumes trivially lcssa phis
1765 // have already been removed; TODO: generalize
1766 BasicBlock *ExitBlock =
1767 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1768 if (!ExitBlock->phis().empty())
1769 return true;
1770
1771 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1772 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1773 !Rewriter.isSafeToExpand(ExitCount))
1774 return true;
1775
1776 assert(SE->isLoopInvariant(ExitCount, L) &&
1777 "Exit count must be loop invariant");
1778 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1779 return false;
1780 };
1781
1782 // If we have any exits which can't be predicated themselves, than we can't
1783 // predicate any exit which isn't guaranteed to execute before it. Consider
1784 // two exits (a) and (b) which would both exit on the same iteration. If we
1785 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1786 // we could convert a loop from exiting through (a) to one exiting through
1787 // (b). Note that this problem exists only for exits with the same exit
1788 // count, and we could be more aggressive when exit counts are known inequal.
1789 llvm::sort(ExitingBlocks,
1790 [&](BasicBlock *A, BasicBlock *B) {
1791 // std::sort sorts in ascending order, so we want the inverse of
1792 // the normal dominance relation, plus a tie breaker for blocks
1793 // unordered by dominance.
1794 if (DT->properlyDominates(A, B)) return true;
1795 if (DT->properlyDominates(B, A)) return false;
1796 return A->getName() < B->getName();
1797 });
1798 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1799 // exits before the backedge). If they aren't, reasoning about reachability
1800 // is complicated and we choose not to for now.
1801 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1802 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1803 return false;
1804
1805 // Given our sorted total order, we know that exit[j] must be evaluated
1806 // after all exit[i] such j > i.
1807 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1808 if (BadExit(ExitingBlocks[i])) {
1809 ExitingBlocks.resize(i);
1810 break;
1811 }
1812
1813 if (ExitingBlocks.empty())
1814 return false;
1815
1816 // We rely on not being able to reach an exiting block on a later iteration
1817 // then it's statically compute exit count. The implementaton of
1818 // getExitCount currently has this invariant, but assert it here so that
1819 // breakage is obvious if this ever changes..
1820 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1821 return DT->dominates(ExitingBB, L->getLoopLatch());
1822 }));
1823
1824 // At this point, ExitingBlocks consists of only those blocks which are
1825 // predicatable. Given that, we know we have at least one exit we can
1826 // predicate if the loop is doesn't have side effects and doesn't have any
1827 // implicit exits (because then our exact BTC isn't actually exact).
1828 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1829 // suggestions on how to improve this? I can obviously bail out for outer
1830 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1831 // is that enough for *all* side effects?
1832 for (BasicBlock *BB : L->blocks())
1833 for (auto &I : *BB)
1834 // TODO:isGuaranteedToTransfer
1835 if (I.mayHaveSideEffects())
1836 return false;
1837
1838 bool Changed = false;
1839 // Finally, do the actual predication for all predicatable blocks. A couple
1840 // of notes here:
1841 // 1) We don't bother to constant fold dominated exits with identical exit
1842 // counts; that's simply a form of CSE/equality propagation and we leave
1843 // it for dedicated passes.
1844 // 2) We insert the comparison at the branch. Hoisting introduces additional
1845 // legality constraints and we leave that to dedicated logic. We want to
1846 // predicate even if we can't insert a loop invariant expression as
1847 // peeling or unrolling will likely reduce the cost of the otherwise loop
1848 // varying check.
1849 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1850 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1851 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1852 for (BasicBlock *ExitingBB : ExitingBlocks) {
1853 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1854
1855 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1856 Value *NewCond;
1857 if (ExitCount == ExactBTC) {
1858 NewCond = L->contains(BI->getSuccessor(0)) ?
1859 B.getFalse() : B.getTrue();
1860 } else {
1861 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1862 if (!ExactBTCV)
1863 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1864 Value *RHS = ExactBTCV;
1865 if (ECV->getType() != RHS->getType()) {
1866 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1867 ECV = B.CreateZExt(ECV, WiderTy);
1868 RHS = B.CreateZExt(RHS, WiderTy);
1869 }
1870 auto Pred = L->contains(BI->getSuccessor(0)) ?
1871 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1872 NewCond = B.CreateICmp(Pred, ECV, RHS);
1873 }
1874 Value *OldCond = BI->getCondition();
1875 BI->setCondition(NewCond);
1876 if (OldCond->use_empty())
1877 DeadInsts.emplace_back(OldCond);
1878 Changed = true;
1879 }
1880
1881 return Changed;
1882 }
1883
1884 //===----------------------------------------------------------------------===//
1885 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1886 //===----------------------------------------------------------------------===//
1887
run(Loop * L)1888 bool IndVarSimplify::run(Loop *L) {
1889 // We need (and expect!) the incoming loop to be in LCSSA.
1890 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1891 "LCSSA required to run indvars!");
1892
1893 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1894 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1895 // canonicalization can be a pessimization without LSR to "clean up"
1896 // afterwards.
1897 // - We depend on having a preheader; in particular,
1898 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1899 // and we're in trouble if we can't find the induction variable even when
1900 // we've manually inserted one.
1901 // - LFTR relies on having a single backedge.
1902 if (!L->isLoopSimplifyForm())
1903 return false;
1904
1905 #ifndef NDEBUG
1906 // Used below for a consistency check only
1907 // Note: Since the result returned by ScalarEvolution may depend on the order
1908 // in which previous results are added to its cache, the call to
1909 // getBackedgeTakenCount() may change following SCEV queries.
1910 const SCEV *BackedgeTakenCount;
1911 if (VerifyIndvars)
1912 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1913 #endif
1914
1915 bool Changed = false;
1916 // If there are any floating-point recurrences, attempt to
1917 // transform them to use integer recurrences.
1918 Changed |= rewriteNonIntegerIVs(L);
1919
1920 // Create a rewriter object which we'll use to transform the code with.
1921 SCEVExpander Rewriter(*SE, DL, "indvars");
1922 #ifndef NDEBUG
1923 Rewriter.setDebugType(DEBUG_TYPE);
1924 #endif
1925
1926 // Eliminate redundant IV users.
1927 //
1928 // Simplification works best when run before other consumers of SCEV. We
1929 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1930 // other expressions involving loop IVs have been evaluated. This helps SCEV
1931 // set no-wrap flags before normalizing sign/zero extension.
1932 Rewriter.disableCanonicalMode();
1933 Changed |= simplifyAndExtend(L, Rewriter, LI);
1934
1935 // Check to see if we can compute the final value of any expressions
1936 // that are recurrent in the loop, and substitute the exit values from the
1937 // loop into any instructions outside of the loop that use the final values
1938 // of the current expressions.
1939 if (ReplaceExitValue != NeverRepl) {
1940 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1941 ReplaceExitValue, DeadInsts)) {
1942 NumReplaced += Rewrites;
1943 Changed = true;
1944 }
1945 }
1946
1947 // Eliminate redundant IV cycles.
1948 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1949
1950 // Try to convert exit conditions to unsigned and rotate computation
1951 // out of the loop. Note: Handles invalidation internally if needed.
1952 Changed |= canonicalizeExitCondition(L);
1953
1954 // Try to eliminate loop exits based on analyzeable exit counts
1955 if (optimizeLoopExits(L, Rewriter)) {
1956 Changed = true;
1957 // Given we've changed exit counts, notify SCEV
1958 // Some nested loops may share same folded exit basic block,
1959 // thus we need to notify top most loop.
1960 SE->forgetTopmostLoop(L);
1961 }
1962
1963 // Try to form loop invariant tests for loop exits by changing how many
1964 // iterations of the loop run when that is unobservable.
1965 if (predicateLoopExits(L, Rewriter)) {
1966 Changed = true;
1967 // Given we've changed exit counts, notify SCEV
1968 SE->forgetLoop(L);
1969 }
1970
1971 // If we have a trip count expression, rewrite the loop's exit condition
1972 // using it.
1973 if (!DisableLFTR) {
1974 BasicBlock *PreHeader = L->getLoopPreheader();
1975
1976 SmallVector<BasicBlock*, 16> ExitingBlocks;
1977 L->getExitingBlocks(ExitingBlocks);
1978 for (BasicBlock *ExitingBB : ExitingBlocks) {
1979 // Can't rewrite non-branch yet.
1980 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1981 continue;
1982
1983 // If our exitting block exits multiple loops, we can only rewrite the
1984 // innermost one. Otherwise, we're changing how many times the innermost
1985 // loop runs before it exits.
1986 if (LI->getLoopFor(ExitingBB) != L)
1987 continue;
1988
1989 if (!needsLFTR(L, ExitingBB))
1990 continue;
1991
1992 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1993 if (isa<SCEVCouldNotCompute>(ExitCount))
1994 continue;
1995
1996 // This was handled above, but as we form SCEVs, we can sometimes refine
1997 // existing ones; this allows exit counts to be folded to zero which
1998 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1999 // until stable to handle cases like this better.
2000 if (ExitCount->isZero())
2001 continue;
2002
2003 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2004 if (!IndVar)
2005 continue;
2006
2007 // Avoid high cost expansions. Note: This heuristic is questionable in
2008 // that our definition of "high cost" is not exactly principled.
2009 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2010 TTI, PreHeader->getTerminator()))
2011 continue;
2012
2013 // Check preconditions for proper SCEVExpander operation. SCEV does not
2014 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2015 // any pass that uses the SCEVExpander must do it. This does not work
2016 // well for loop passes because SCEVExpander makes assumptions about
2017 // all loops, while LoopPassManager only forces the current loop to be
2018 // simplified.
2019 //
2020 // FIXME: SCEV expansion has no way to bail out, so the caller must
2021 // explicitly check any assumptions made by SCEV. Brittle.
2022 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2023 if (!AR || AR->getLoop()->getLoopPreheader())
2024 Changed |= linearFunctionTestReplace(L, ExitingBB,
2025 ExitCount, IndVar,
2026 Rewriter);
2027 }
2028 }
2029 // Clear the rewriter cache, because values that are in the rewriter's cache
2030 // can be deleted in the loop below, causing the AssertingVH in the cache to
2031 // trigger.
2032 Rewriter.clear();
2033
2034 // Now that we're done iterating through lists, clean up any instructions
2035 // which are now dead.
2036 while (!DeadInsts.empty()) {
2037 Value *V = DeadInsts.pop_back_val();
2038
2039 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2040 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2041 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2042 Changed |=
2043 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2044 }
2045
2046 // The Rewriter may not be used from this point on.
2047
2048 // Loop-invariant instructions in the preheader that aren't used in the
2049 // loop may be sunk below the loop to reduce register pressure.
2050 Changed |= sinkUnusedInvariants(L);
2051
2052 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2053 // trip count and therefore can further simplify exit values in addition to
2054 // rewriteLoopExitValues.
2055 Changed |= rewriteFirstIterationLoopExitValues(L);
2056
2057 // Clean up dead instructions.
2058 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2059
2060 // Check a post-condition.
2061 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2062 "Indvars did not preserve LCSSA!");
2063
2064 // Verify that LFTR, and any other change have not interfered with SCEV's
2065 // ability to compute trip count. We may have *changed* the exit count, but
2066 // only by reducing it.
2067 #ifndef NDEBUG
2068 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2069 SE->forgetLoop(L);
2070 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2071 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2072 SE->getTypeSizeInBits(NewBECount->getType()))
2073 NewBECount = SE->getTruncateOrNoop(NewBECount,
2074 BackedgeTakenCount->getType());
2075 else
2076 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2077 NewBECount->getType());
2078 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2079 NewBECount) && "indvars must preserve SCEV");
2080 }
2081 if (VerifyMemorySSA && MSSAU)
2082 MSSAU->getMemorySSA()->verifyMemorySSA();
2083 #endif
2084
2085 return Changed;
2086 }
2087
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)2088 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2089 LoopStandardAnalysisResults &AR,
2090 LPMUpdater &) {
2091 Function *F = L.getHeader()->getParent();
2092 const DataLayout &DL = F->getParent()->getDataLayout();
2093
2094 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2095 WidenIndVars && AllowIVWidening);
2096 if (!IVS.run(&L))
2097 return PreservedAnalyses::all();
2098
2099 auto PA = getLoopPassPreservedAnalyses();
2100 PA.preserveSet<CFGAnalyses>();
2101 if (AR.MSSA)
2102 PA.preserve<MemorySSAAnalysis>();
2103 return PA;
2104 }
2105
2106 namespace {
2107
2108 struct IndVarSimplifyLegacyPass : public LoopPass {
2109 static char ID; // Pass identification, replacement for typeid
2110
IndVarSimplifyLegacyPass__anon175dabd40a11::IndVarSimplifyLegacyPass2111 IndVarSimplifyLegacyPass() : LoopPass(ID) {
2112 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2113 }
2114
runOnLoop__anon175dabd40a11::IndVarSimplifyLegacyPass2115 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2116 if (skipLoop(L))
2117 return false;
2118
2119 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2120 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2121 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2122 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2123 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2124 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2125 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2126 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2127 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2128 MemorySSA *MSSA = nullptr;
2129 if (MSSAAnalysis)
2130 MSSA = &MSSAAnalysis->getMSSA();
2131
2132 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2133 return IVS.run(L);
2134 }
2135
getAnalysisUsage__anon175dabd40a11::IndVarSimplifyLegacyPass2136 void getAnalysisUsage(AnalysisUsage &AU) const override {
2137 AU.setPreservesCFG();
2138 AU.addPreserved<MemorySSAWrapperPass>();
2139 getLoopAnalysisUsage(AU);
2140 }
2141 };
2142
2143 } // end anonymous namespace
2144
2145 char IndVarSimplifyLegacyPass::ID = 0;
2146
2147 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2148 "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)2149 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2150 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2151 "Induction Variable Simplification", false, false)
2152
2153 Pass *llvm::createIndVarSimplifyPass() {
2154 return new IndVarSimplifyLegacyPass();
2155 }
2156