1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18 /// \ /
19 /// [ %e = phi i32 %a2, %c2 ]
20 /// [ add i32 %e, 4 ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/Hashing.h"
40 #include "llvm/ADT/None.h"
41 #include "llvm/ADT/Optional.h"
42 #include "llvm/ADT/PostOrderIterator.h"
43 #include "llvm/ADT/STLExtras.h"
44 #include "llvm/ADT/SmallPtrSet.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/Analysis/GlobalsModRef.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/CFG.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/PassManager.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/Use.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/InitializePasses.h"
60 #include "llvm/Pass.h"
61 #include "llvm/Support/Allocator.h"
62 #include "llvm/Support/ArrayRecycler.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/raw_ostream.h"
68 #include "llvm/Transforms/Scalar.h"
69 #include "llvm/Transforms/Scalar/GVN.h"
70 #include "llvm/Transforms/Scalar/GVNExpression.h"
71 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
72 #include "llvm/Transforms/Utils/Local.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstddef>
76 #include <cstdint>
77 #include <iterator>
78 #include <utility>
79
80 using namespace llvm;
81
82 #define DEBUG_TYPE "gvn-sink"
83
84 STATISTIC(NumRemoved, "Number of instructions removed");
85
86 namespace llvm {
87 namespace GVNExpression {
88
dump() const89 LLVM_DUMP_METHOD void Expression::dump() const {
90 print(dbgs());
91 dbgs() << "\n";
92 }
93
94 } // end namespace GVNExpression
95 } // end namespace llvm
96
97 namespace {
98
isMemoryInst(const Instruction * I)99 static bool isMemoryInst(const Instruction *I) {
100 return isa<LoadInst>(I) || isa<StoreInst>(I) ||
101 (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
102 (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
103 }
104
105 /// Iterates through instructions in a set of blocks in reverse order from the
106 /// first non-terminator. For example (assume all blocks have size n):
107 /// LockstepReverseIterator I([B1, B2, B3]);
108 /// *I-- = [B1[n], B2[n], B3[n]];
109 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
110 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
111 /// ...
112 ///
113 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
114 /// to
115 /// determine which blocks are still going and the order they appear in the
116 /// list returned by operator*.
117 class LockstepReverseIterator {
118 ArrayRef<BasicBlock *> Blocks;
119 SmallSetVector<BasicBlock *, 4> ActiveBlocks;
120 SmallVector<Instruction *, 4> Insts;
121 bool Fail;
122
123 public:
LockstepReverseIterator(ArrayRef<BasicBlock * > Blocks)124 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
125 reset();
126 }
127
reset()128 void reset() {
129 Fail = false;
130 ActiveBlocks.clear();
131 for (BasicBlock *BB : Blocks)
132 ActiveBlocks.insert(BB);
133 Insts.clear();
134 for (BasicBlock *BB : Blocks) {
135 if (BB->size() <= 1) {
136 // Block wasn't big enough - only contained a terminator.
137 ActiveBlocks.remove(BB);
138 continue;
139 }
140 Insts.push_back(BB->getTerminator()->getPrevNode());
141 }
142 if (Insts.empty())
143 Fail = true;
144 }
145
isValid() const146 bool isValid() const { return !Fail; }
operator *() const147 ArrayRef<Instruction *> operator*() const { return Insts; }
148
149 // Note: This needs to return a SmallSetVector as the elements of
150 // ActiveBlocks will be later copied to Blocks using std::copy. The
151 // resultant order of elements in Blocks needs to be deterministic.
152 // Using SmallPtrSet instead causes non-deterministic order while
153 // copying. And we cannot simply sort Blocks as they need to match the
154 // corresponding Values.
getActiveBlocks()155 SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
156
restrictToBlocks(SmallSetVector<BasicBlock *,4> & Blocks)157 void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
158 for (auto II = Insts.begin(); II != Insts.end();) {
159 if (!llvm::is_contained(Blocks, (*II)->getParent())) {
160 ActiveBlocks.remove((*II)->getParent());
161 II = Insts.erase(II);
162 } else {
163 ++II;
164 }
165 }
166 }
167
operator --()168 void operator--() {
169 if (Fail)
170 return;
171 SmallVector<Instruction *, 4> NewInsts;
172 for (auto *Inst : Insts) {
173 if (Inst == &Inst->getParent()->front())
174 ActiveBlocks.remove(Inst->getParent());
175 else
176 NewInsts.push_back(Inst->getPrevNode());
177 }
178 if (NewInsts.empty()) {
179 Fail = true;
180 return;
181 }
182 Insts = NewInsts;
183 }
184 };
185
186 //===----------------------------------------------------------------------===//
187
188 /// Candidate solution for sinking. There may be different ways to
189 /// sink instructions, differing in the number of instructions sunk,
190 /// the number of predecessors sunk from and the number of PHIs
191 /// required.
192 struct SinkingInstructionCandidate {
193 unsigned NumBlocks;
194 unsigned NumInstructions;
195 unsigned NumPHIs;
196 unsigned NumMemoryInsts;
197 int Cost = -1;
198 SmallVector<BasicBlock *, 4> Blocks;
199
calculateCost__anon7f9656e30111::SinkingInstructionCandidate200 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
201 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
202 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
203 Cost = (NumInstructions * (NumBlocks - 1)) -
204 (NumExtraPHIs *
205 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
206 - SplitEdgeCost;
207 }
208
operator >__anon7f9656e30111::SinkingInstructionCandidate209 bool operator>(const SinkingInstructionCandidate &Other) const {
210 return Cost > Other.Cost;
211 }
212 };
213
214 #ifndef NDEBUG
operator <<(raw_ostream & OS,const SinkingInstructionCandidate & C)215 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
216 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
217 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
218 return OS;
219 }
220 #endif
221
222 //===----------------------------------------------------------------------===//
223
224 /// Describes a PHI node that may or may not exist. These track the PHIs
225 /// that must be created if we sunk a sequence of instructions. It provides
226 /// a hash function for efficient equality comparisons.
227 class ModelledPHI {
228 SmallVector<Value *, 4> Values;
229 SmallVector<BasicBlock *, 4> Blocks;
230
231 public:
232 ModelledPHI() = default;
233
ModelledPHI(const PHINode * PN)234 ModelledPHI(const PHINode *PN) {
235 // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
236 SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
237 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
238 Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
239 llvm::sort(Ops);
240 for (auto &P : Ops) {
241 Blocks.push_back(P.first);
242 Values.push_back(P.second);
243 }
244 }
245
246 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
247 /// without the same ID.
248 /// \note This is specifically for DenseMapInfo - do not use this!
createDummy(size_t ID)249 static ModelledPHI createDummy(size_t ID) {
250 ModelledPHI M;
251 M.Values.push_back(reinterpret_cast<Value*>(ID));
252 return M;
253 }
254
255 /// Create a PHI from an array of incoming values and incoming blocks.
256 template <typename VArray, typename BArray>
ModelledPHI(const VArray & V,const BArray & B)257 ModelledPHI(const VArray &V, const BArray &B) {
258 llvm::copy(V, std::back_inserter(Values));
259 llvm::copy(B, std::back_inserter(Blocks));
260 }
261
262 /// Create a PHI from [I[OpNum] for I in Insts].
263 template <typename BArray>
ModelledPHI(ArrayRef<Instruction * > Insts,unsigned OpNum,const BArray & B)264 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
265 llvm::copy(B, std::back_inserter(Blocks));
266 for (auto *I : Insts)
267 Values.push_back(I->getOperand(OpNum));
268 }
269
270 /// Restrict the PHI's contents down to only \c NewBlocks.
271 /// \c NewBlocks must be a subset of \c this->Blocks.
restrictToBlocks(const SmallSetVector<BasicBlock *,4> & NewBlocks)272 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
273 auto BI = Blocks.begin();
274 auto VI = Values.begin();
275 while (BI != Blocks.end()) {
276 assert(VI != Values.end());
277 if (!llvm::is_contained(NewBlocks, *BI)) {
278 BI = Blocks.erase(BI);
279 VI = Values.erase(VI);
280 } else {
281 ++BI;
282 ++VI;
283 }
284 }
285 assert(Blocks.size() == NewBlocks.size());
286 }
287
getValues() const288 ArrayRef<Value *> getValues() const { return Values; }
289
areAllIncomingValuesSame() const290 bool areAllIncomingValuesSame() const {
291 return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
292 }
293
areAllIncomingValuesSameType() const294 bool areAllIncomingValuesSameType() const {
295 return llvm::all_of(
296 Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
297 }
298
areAnyIncomingValuesConstant() const299 bool areAnyIncomingValuesConstant() const {
300 return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
301 }
302
303 // Hash functor
hash() const304 unsigned hash() const {
305 return (unsigned)hash_combine_range(Values.begin(), Values.end());
306 }
307
operator ==(const ModelledPHI & Other) const308 bool operator==(const ModelledPHI &Other) const {
309 return Values == Other.Values && Blocks == Other.Blocks;
310 }
311 };
312
313 template <typename ModelledPHI> struct DenseMapInfo {
getEmptyKey__anon7f9656e30111::DenseMapInfo314 static inline ModelledPHI &getEmptyKey() {
315 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
316 return Dummy;
317 }
318
getTombstoneKey__anon7f9656e30111::DenseMapInfo319 static inline ModelledPHI &getTombstoneKey() {
320 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
321 return Dummy;
322 }
323
getHashValue__anon7f9656e30111::DenseMapInfo324 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
325
isEqual__anon7f9656e30111::DenseMapInfo326 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
327 return LHS == RHS;
328 }
329 };
330
331 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
332
333 //===----------------------------------------------------------------------===//
334 // ValueTable
335 //===----------------------------------------------------------------------===//
336 // This is a value number table where the value number is a function of the
337 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
338 // that the program would be equivalent if we replaced A with PHI(A, B).
339 //===----------------------------------------------------------------------===//
340
341 /// A GVN expression describing how an instruction is used. The operands
342 /// field of BasicExpression is used to store uses, not operands.
343 ///
344 /// This class also contains fields for discriminators used when determining
345 /// equivalence of instructions with sideeffects.
346 class InstructionUseExpr : public GVNExpression::BasicExpression {
347 unsigned MemoryUseOrder = -1;
348 bool Volatile = false;
349 ArrayRef<int> ShuffleMask;
350
351 public:
InstructionUseExpr(Instruction * I,ArrayRecycler<Value * > & R,BumpPtrAllocator & A)352 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
353 BumpPtrAllocator &A)
354 : GVNExpression::BasicExpression(I->getNumUses()) {
355 allocateOperands(R, A);
356 setOpcode(I->getOpcode());
357 setType(I->getType());
358
359 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
360 ShuffleMask = SVI->getShuffleMask().copy(A);
361
362 for (auto &U : I->uses())
363 op_push_back(U.getUser());
364 llvm::sort(op_begin(), op_end());
365 }
366
setMemoryUseOrder(unsigned MUO)367 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
setVolatile(bool V)368 void setVolatile(bool V) { Volatile = V; }
369
getHashValue() const370 hash_code getHashValue() const override {
371 return hash_combine(GVNExpression::BasicExpression::getHashValue(),
372 MemoryUseOrder, Volatile, ShuffleMask);
373 }
374
getHashValue(Function MapFn)375 template <typename Function> hash_code getHashValue(Function MapFn) {
376 hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
377 ShuffleMask);
378 for (auto *V : operands())
379 H = hash_combine(H, MapFn(V));
380 return H;
381 }
382 };
383
384 using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
385
386 class ValueTable {
387 DenseMap<Value *, uint32_t> ValueNumbering;
388 DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
389 DenseMap<size_t, uint32_t> HashNumbering;
390 BumpPtrAllocator Allocator;
391 ArrayRecycler<Value *> Recycler;
392 uint32_t nextValueNumber = 1;
393 BasicBlocksSet ReachableBBs;
394
395 /// Create an expression for I based on its opcode and its uses. If I
396 /// touches or reads memory, the expression is also based upon its memory
397 /// order - see \c getMemoryUseOrder().
createExpr(Instruction * I)398 InstructionUseExpr *createExpr(Instruction *I) {
399 InstructionUseExpr *E =
400 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
401 if (isMemoryInst(I))
402 E->setMemoryUseOrder(getMemoryUseOrder(I));
403
404 if (CmpInst *C = dyn_cast<CmpInst>(I)) {
405 CmpInst::Predicate Predicate = C->getPredicate();
406 E->setOpcode((C->getOpcode() << 8) | Predicate);
407 }
408 return E;
409 }
410
411 /// Helper to compute the value number for a memory instruction
412 /// (LoadInst/StoreInst), including checking the memory ordering and
413 /// volatility.
createMemoryExpr(Inst * I)414 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
415 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
416 return nullptr;
417 InstructionUseExpr *E = createExpr(I);
418 E->setVolatile(I->isVolatile());
419 return E;
420 }
421
422 public:
423 ValueTable() = default;
424
425 /// Set basic blocks reachable from entry block.
setReachableBBs(const BasicBlocksSet & ReachableBBs)426 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
427 this->ReachableBBs = ReachableBBs;
428 }
429
430 /// Returns the value number for the specified value, assigning
431 /// it a new number if it did not have one before.
lookupOrAdd(Value * V)432 uint32_t lookupOrAdd(Value *V) {
433 auto VI = ValueNumbering.find(V);
434 if (VI != ValueNumbering.end())
435 return VI->second;
436
437 if (!isa<Instruction>(V)) {
438 ValueNumbering[V] = nextValueNumber;
439 return nextValueNumber++;
440 }
441
442 Instruction *I = cast<Instruction>(V);
443 if (!ReachableBBs.contains(I->getParent()))
444 return ~0U;
445
446 InstructionUseExpr *exp = nullptr;
447 switch (I->getOpcode()) {
448 case Instruction::Load:
449 exp = createMemoryExpr(cast<LoadInst>(I));
450 break;
451 case Instruction::Store:
452 exp = createMemoryExpr(cast<StoreInst>(I));
453 break;
454 case Instruction::Call:
455 case Instruction::Invoke:
456 case Instruction::FNeg:
457 case Instruction::Add:
458 case Instruction::FAdd:
459 case Instruction::Sub:
460 case Instruction::FSub:
461 case Instruction::Mul:
462 case Instruction::FMul:
463 case Instruction::UDiv:
464 case Instruction::SDiv:
465 case Instruction::FDiv:
466 case Instruction::URem:
467 case Instruction::SRem:
468 case Instruction::FRem:
469 case Instruction::Shl:
470 case Instruction::LShr:
471 case Instruction::AShr:
472 case Instruction::And:
473 case Instruction::Or:
474 case Instruction::Xor:
475 case Instruction::ICmp:
476 case Instruction::FCmp:
477 case Instruction::Trunc:
478 case Instruction::ZExt:
479 case Instruction::SExt:
480 case Instruction::FPToUI:
481 case Instruction::FPToSI:
482 case Instruction::UIToFP:
483 case Instruction::SIToFP:
484 case Instruction::FPTrunc:
485 case Instruction::FPExt:
486 case Instruction::PtrToInt:
487 case Instruction::IntToPtr:
488 case Instruction::BitCast:
489 case Instruction::AddrSpaceCast:
490 case Instruction::Select:
491 case Instruction::ExtractElement:
492 case Instruction::InsertElement:
493 case Instruction::ShuffleVector:
494 case Instruction::InsertValue:
495 case Instruction::GetElementPtr:
496 exp = createExpr(I);
497 break;
498 default:
499 break;
500 }
501
502 if (!exp) {
503 ValueNumbering[V] = nextValueNumber;
504 return nextValueNumber++;
505 }
506
507 uint32_t e = ExpressionNumbering[exp];
508 if (!e) {
509 hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
510 auto I = HashNumbering.find(H);
511 if (I != HashNumbering.end()) {
512 e = I->second;
513 } else {
514 e = nextValueNumber++;
515 HashNumbering[H] = e;
516 ExpressionNumbering[exp] = e;
517 }
518 }
519 ValueNumbering[V] = e;
520 return e;
521 }
522
523 /// Returns the value number of the specified value. Fails if the value has
524 /// not yet been numbered.
lookup(Value * V) const525 uint32_t lookup(Value *V) const {
526 auto VI = ValueNumbering.find(V);
527 assert(VI != ValueNumbering.end() && "Value not numbered?");
528 return VI->second;
529 }
530
531 /// Removes all value numberings and resets the value table.
clear()532 void clear() {
533 ValueNumbering.clear();
534 ExpressionNumbering.clear();
535 HashNumbering.clear();
536 Recycler.clear(Allocator);
537 nextValueNumber = 1;
538 }
539
540 /// \c Inst uses or touches memory. Return an ID describing the memory state
541 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
542 /// the exact same memory operations happen after I1 and I2.
543 ///
544 /// This is a very hard problem in general, so we use domain-specific
545 /// knowledge that we only ever check for equivalence between blocks sharing a
546 /// single immediate successor that is common, and when determining if I1 ==
547 /// I2 we will have already determined that next(I1) == next(I2). This
548 /// inductive property allows us to simply return the value number of the next
549 /// instruction that defines memory.
getMemoryUseOrder(Instruction * Inst)550 uint32_t getMemoryUseOrder(Instruction *Inst) {
551 auto *BB = Inst->getParent();
552 for (auto I = std::next(Inst->getIterator()), E = BB->end();
553 I != E && !I->isTerminator(); ++I) {
554 if (!isMemoryInst(&*I))
555 continue;
556 if (isa<LoadInst>(&*I))
557 continue;
558 CallInst *CI = dyn_cast<CallInst>(&*I);
559 if (CI && CI->onlyReadsMemory())
560 continue;
561 InvokeInst *II = dyn_cast<InvokeInst>(&*I);
562 if (II && II->onlyReadsMemory())
563 continue;
564 return lookupOrAdd(&*I);
565 }
566 return 0;
567 }
568 };
569
570 //===----------------------------------------------------------------------===//
571
572 class GVNSink {
573 public:
574 GVNSink() = default;
575
run(Function & F)576 bool run(Function &F) {
577 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
578 << "\n");
579
580 unsigned NumSunk = 0;
581 ReversePostOrderTraversal<Function*> RPOT(&F);
582 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
583 for (auto *N : RPOT)
584 NumSunk += sinkBB(N);
585
586 return NumSunk > 0;
587 }
588
589 private:
590 ValueTable VN;
591
shouldAvoidSinkingInstruction(Instruction * I)592 bool shouldAvoidSinkingInstruction(Instruction *I) {
593 // These instructions may change or break semantics if moved.
594 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
595 I->getType()->isTokenTy())
596 return true;
597 return false;
598 }
599
600 /// The main heuristic function. Analyze the set of instructions pointed to by
601 /// LRI and return a candidate solution if these instructions can be sunk, or
602 /// None otherwise.
603 Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
604 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
605 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
606
607 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
analyzeInitialPHIs(BasicBlock * BB,ModelledPHISet & PHIs,SmallPtrSetImpl<Value * > & PHIContents)608 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
609 SmallPtrSetImpl<Value *> &PHIContents) {
610 for (PHINode &PN : BB->phis()) {
611 auto MPHI = ModelledPHI(&PN);
612 PHIs.insert(MPHI);
613 for (auto *V : MPHI.getValues())
614 PHIContents.insert(V);
615 }
616 }
617
618 /// The main instruction sinking driver. Set up state and try and sink
619 /// instructions into BBEnd from its predecessors.
620 unsigned sinkBB(BasicBlock *BBEnd);
621
622 /// Perform the actual mechanics of sinking an instruction from Blocks into
623 /// BBEnd, which is their only successor.
624 void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
625
626 /// Remove PHIs that all have the same incoming value.
foldPointlessPHINodes(BasicBlock * BB)627 void foldPointlessPHINodes(BasicBlock *BB) {
628 auto I = BB->begin();
629 while (PHINode *PN = dyn_cast<PHINode>(I++)) {
630 if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
631 return V == PN->getIncomingValue(0);
632 }))
633 continue;
634 if (PN->getIncomingValue(0) != PN)
635 PN->replaceAllUsesWith(PN->getIncomingValue(0));
636 else
637 PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
638 PN->eraseFromParent();
639 }
640 }
641 };
642
analyzeInstructionForSinking(LockstepReverseIterator & LRI,unsigned & InstNum,unsigned & MemoryInstNum,ModelledPHISet & NeededPHIs,SmallPtrSetImpl<Value * > & PHIContents)643 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
644 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
645 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
646 auto Insts = *LRI;
647 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
648 : Insts) {
649 I->dump();
650 } dbgs() << " ]\n";);
651
652 DenseMap<uint32_t, unsigned> VNums;
653 for (auto *I : Insts) {
654 uint32_t N = VN.lookupOrAdd(I);
655 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
656 if (N == ~0U)
657 return None;
658 VNums[N]++;
659 }
660 unsigned VNumToSink =
661 std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
662
663 if (VNums[VNumToSink] == 1)
664 // Can't sink anything!
665 return None;
666
667 // Now restrict the number of incoming blocks down to only those with
668 // VNumToSink.
669 auto &ActivePreds = LRI.getActiveBlocks();
670 unsigned InitialActivePredSize = ActivePreds.size();
671 SmallVector<Instruction *, 4> NewInsts;
672 for (auto *I : Insts) {
673 if (VN.lookup(I) != VNumToSink)
674 ActivePreds.remove(I->getParent());
675 else
676 NewInsts.push_back(I);
677 }
678 for (auto *I : NewInsts)
679 if (shouldAvoidSinkingInstruction(I))
680 return None;
681
682 // If we've restricted the incoming blocks, restrict all needed PHIs also
683 // to that set.
684 bool RecomputePHIContents = false;
685 if (ActivePreds.size() != InitialActivePredSize) {
686 ModelledPHISet NewNeededPHIs;
687 for (auto P : NeededPHIs) {
688 P.restrictToBlocks(ActivePreds);
689 NewNeededPHIs.insert(P);
690 }
691 NeededPHIs = NewNeededPHIs;
692 LRI.restrictToBlocks(ActivePreds);
693 RecomputePHIContents = true;
694 }
695
696 // The sunk instruction's results.
697 ModelledPHI NewPHI(NewInsts, ActivePreds);
698
699 // Does sinking this instruction render previous PHIs redundant?
700 if (NeededPHIs.erase(NewPHI))
701 RecomputePHIContents = true;
702
703 if (RecomputePHIContents) {
704 // The needed PHIs have changed, so recompute the set of all needed
705 // values.
706 PHIContents.clear();
707 for (auto &PHI : NeededPHIs)
708 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
709 }
710
711 // Is this instruction required by a later PHI that doesn't match this PHI?
712 // if so, we can't sink this instruction.
713 for (auto *V : NewPHI.getValues())
714 if (PHIContents.count(V))
715 // V exists in this PHI, but the whole PHI is different to NewPHI
716 // (else it would have been removed earlier). We cannot continue
717 // because this isn't representable.
718 return None;
719
720 // Which operands need PHIs?
721 // FIXME: If any of these fail, we should partition up the candidates to
722 // try and continue making progress.
723 Instruction *I0 = NewInsts[0];
724
725 // If all instructions that are going to participate don't have the same
726 // number of operands, we can't do any useful PHI analysis for all operands.
727 auto hasDifferentNumOperands = [&I0](Instruction *I) {
728 return I->getNumOperands() != I0->getNumOperands();
729 };
730 if (any_of(NewInsts, hasDifferentNumOperands))
731 return None;
732
733 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
734 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
735 if (PHI.areAllIncomingValuesSame())
736 continue;
737 if (!canReplaceOperandWithVariable(I0, OpNum))
738 // We can 't create a PHI from this instruction!
739 return None;
740 if (NeededPHIs.count(PHI))
741 continue;
742 if (!PHI.areAllIncomingValuesSameType())
743 return None;
744 // Don't create indirect calls! The called value is the final operand.
745 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
746 PHI.areAnyIncomingValuesConstant())
747 return None;
748
749 NeededPHIs.reserve(NeededPHIs.size());
750 NeededPHIs.insert(PHI);
751 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
752 }
753
754 if (isMemoryInst(NewInsts[0]))
755 ++MemoryInstNum;
756
757 SinkingInstructionCandidate Cand;
758 Cand.NumInstructions = ++InstNum;
759 Cand.NumMemoryInsts = MemoryInstNum;
760 Cand.NumBlocks = ActivePreds.size();
761 Cand.NumPHIs = NeededPHIs.size();
762 append_range(Cand.Blocks, ActivePreds);
763
764 return Cand;
765 }
766
sinkBB(BasicBlock * BBEnd)767 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
768 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
769 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
770 SmallVector<BasicBlock *, 4> Preds;
771 for (auto *B : predecessors(BBEnd)) {
772 auto *T = B->getTerminator();
773 if (isa<BranchInst>(T) || isa<SwitchInst>(T))
774 Preds.push_back(B);
775 else
776 return 0;
777 }
778 if (Preds.size() < 2)
779 return 0;
780 llvm::sort(Preds);
781
782 unsigned NumOrigPreds = Preds.size();
783 // We can only sink instructions through unconditional branches.
784 llvm::erase_if(Preds, [](BasicBlock *BB) {
785 return BB->getTerminator()->getNumSuccessors() != 1;
786 });
787
788 LockstepReverseIterator LRI(Preds);
789 SmallVector<SinkingInstructionCandidate, 4> Candidates;
790 unsigned InstNum = 0, MemoryInstNum = 0;
791 ModelledPHISet NeededPHIs;
792 SmallPtrSet<Value *, 4> PHIContents;
793 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
794 unsigned NumOrigPHIs = NeededPHIs.size();
795
796 while (LRI.isValid()) {
797 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
798 NeededPHIs, PHIContents);
799 if (!Cand)
800 break;
801 Cand->calculateCost(NumOrigPHIs, Preds.size());
802 Candidates.emplace_back(*Cand);
803 --LRI;
804 }
805
806 llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
807 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
808 : Candidates) dbgs()
809 << " " << C << "\n";);
810
811 // Pick the top candidate, as long it is positive!
812 if (Candidates.empty() || Candidates.front().Cost <= 0)
813 return 0;
814 auto C = Candidates.front();
815
816 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
817 BasicBlock *InsertBB = BBEnd;
818 if (C.Blocks.size() < NumOrigPreds) {
819 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
820 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
821 InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
822 if (!InsertBB) {
823 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
824 // Edge couldn't be split.
825 return 0;
826 }
827 }
828
829 for (unsigned I = 0; I < C.NumInstructions; ++I)
830 sinkLastInstruction(C.Blocks, InsertBB);
831
832 return C.NumInstructions;
833 }
834
sinkLastInstruction(ArrayRef<BasicBlock * > Blocks,BasicBlock * BBEnd)835 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
836 BasicBlock *BBEnd) {
837 SmallVector<Instruction *, 4> Insts;
838 for (BasicBlock *BB : Blocks)
839 Insts.push_back(BB->getTerminator()->getPrevNode());
840 Instruction *I0 = Insts.front();
841
842 SmallVector<Value *, 4> NewOperands;
843 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
844 bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
845 return I->getOperand(O) != I0->getOperand(O);
846 });
847 if (!NeedPHI) {
848 NewOperands.push_back(I0->getOperand(O));
849 continue;
850 }
851
852 // Create a new PHI in the successor block and populate it.
853 auto *Op = I0->getOperand(O);
854 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
855 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
856 Op->getName() + ".sink", &BBEnd->front());
857 for (auto *I : Insts)
858 PN->addIncoming(I->getOperand(O), I->getParent());
859 NewOperands.push_back(PN);
860 }
861
862 // Arbitrarily use I0 as the new "common" instruction; remap its operands
863 // and move it to the start of the successor block.
864 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
865 I0->getOperandUse(O).set(NewOperands[O]);
866 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
867
868 // Update metadata and IR flags.
869 for (auto *I : Insts)
870 if (I != I0) {
871 combineMetadataForCSE(I0, I, true);
872 I0->andIRFlags(I);
873 }
874
875 for (auto *I : Insts)
876 if (I != I0)
877 I->replaceAllUsesWith(I0);
878 foldPointlessPHINodes(BBEnd);
879
880 // Finally nuke all instructions apart from the common instruction.
881 for (auto *I : Insts)
882 if (I != I0)
883 I->eraseFromParent();
884
885 NumRemoved += Insts.size() - 1;
886 }
887
888 ////////////////////////////////////////////////////////////////////////////////
889 // Pass machinery / boilerplate
890
891 class GVNSinkLegacyPass : public FunctionPass {
892 public:
893 static char ID;
894
GVNSinkLegacyPass()895 GVNSinkLegacyPass() : FunctionPass(ID) {
896 initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
897 }
898
runOnFunction(Function & F)899 bool runOnFunction(Function &F) override {
900 if (skipFunction(F))
901 return false;
902 GVNSink G;
903 return G.run(F);
904 }
905
getAnalysisUsage(AnalysisUsage & AU) const906 void getAnalysisUsage(AnalysisUsage &AU) const override {
907 AU.addPreserved<GlobalsAAWrapperPass>();
908 }
909 };
910
911 } // end anonymous namespace
912
run(Function & F,FunctionAnalysisManager & AM)913 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
914 GVNSink G;
915 if (!G.run(F))
916 return PreservedAnalyses::all();
917 return PreservedAnalyses::none();
918 }
919
920 char GVNSinkLegacyPass::ID = 0;
921
922 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
923 "Early GVN sinking of Expressions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)924 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
925 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
926 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
927 "Early GVN sinking of Expressions", false, false)
928
929 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
930