1 //===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies expensive constants to hoist and coalesces them to
11 // better prepare it for SelectionDAG-based code generation. This works around
12 // the limitations of the basic-block-at-a-time approach.
13 //
14 // First it scans all instructions for integer constants and calculates its
15 // cost. If the constant can be folded into the instruction (the cost is
16 // TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
17 // consider it expensive and leave it alone. This is the default behavior and
18 // the default implementation of getIntImmCost will always return TCC_Free.
19 //
20 // If the cost is more than TCC_BASIC, then the integer constant can't be folded
21 // into the instruction and it might be beneficial to hoist the constant.
22 // Similar constants are coalesced to reduce register pressure and
23 // materialization code.
24 //
25 // When a constant is hoisted, it is also hidden behind a bitcast to force it to
26 // be live-out of the basic block. Otherwise the constant would be just
27 // duplicated and each basic block would have its own copy in the SelectionDAG.
28 // The SelectionDAG recognizes such constants as opaque and doesn't perform
29 // certain transformations on them, which would create a new expensive constant.
30 //
31 // This optimization is only applied to integer constants in instructions and
32 // simple (this means not nested) constant cast expressions. For example:
33 // %0 = load i64* inttoptr (i64 big_constant to i64*)
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/Transforms/Scalar/ConstantHoisting.h"
37 #include "llvm/ADT/APInt.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/None.h"
40 #include "llvm/ADT/Optional.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/Statistic.h"
44 #include "llvm/Analysis/BlockFrequencyInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/Dominators.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/BlockFrequency.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include "llvm/Transforms/Scalar.h"
62 #include "llvm/Transforms/Utils/Local.h"
63 #include <algorithm>
64 #include <cassert>
65 #include <cstdint>
66 #include <iterator>
67 #include <tuple>
68 #include <utility>
69 
70 using namespace llvm;
71 using namespace consthoist;
72 
73 #define DEBUG_TYPE "consthoist"
74 
75 STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
76 STATISTIC(NumConstantsRebased, "Number of constants rebased");
77 
78 static cl::opt<bool> ConstHoistWithBlockFrequency(
79     "consthoist-with-block-frequency", cl::init(true), cl::Hidden,
80     cl::desc("Enable the use of the block frequency analysis to reduce the "
81              "chance to execute const materialization more frequently than "
82              "without hoisting."));
83 
84 namespace {
85 
86 /// \brief The constant hoisting pass.
87 class ConstantHoistingLegacyPass : public FunctionPass {
88 public:
89   static char ID; // Pass identification, replacement for typeid
90 
91   ConstantHoistingLegacyPass() : FunctionPass(ID) {
92     initializeConstantHoistingLegacyPassPass(*PassRegistry::getPassRegistry());
93   }
94 
95   bool runOnFunction(Function &Fn) override;
96 
97   StringRef getPassName() const override { return "Constant Hoisting"; }
98 
99   void getAnalysisUsage(AnalysisUsage &AU) const override {
100     AU.setPreservesCFG();
101     if (ConstHoistWithBlockFrequency)
102       AU.addRequired<BlockFrequencyInfoWrapperPass>();
103     AU.addRequired<DominatorTreeWrapperPass>();
104     AU.addRequired<TargetTransformInfoWrapperPass>();
105   }
106 
107   void releaseMemory() override { Impl.releaseMemory(); }
108 
109 private:
110   ConstantHoistingPass Impl;
111 };
112 
113 } // end anonymous namespace
114 
115 char ConstantHoistingLegacyPass::ID = 0;
116 
117 INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
118                       "Constant Hoisting", false, false)
119 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
120 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
121 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
122 INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
123                     "Constant Hoisting", false, false)
124 
125 FunctionPass *llvm::createConstantHoistingPass() {
126   return new ConstantHoistingLegacyPass();
127 }
128 
129 /// \brief Perform the constant hoisting optimization for the given function.
130 bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
131   if (skipFunction(Fn))
132     return false;
133 
134   DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
135   DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
136 
137   bool MadeChange =
138       Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
139                    getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
140                    ConstHoistWithBlockFrequency
141                        ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
142                        : nullptr,
143                    Fn.getEntryBlock());
144 
145   if (MadeChange) {
146     DEBUG(dbgs() << "********** Function after Constant Hoisting: "
147                  << Fn.getName() << '\n');
148     DEBUG(dbgs() << Fn);
149   }
150   DEBUG(dbgs() << "********** End Constant Hoisting **********\n");
151 
152   return MadeChange;
153 }
154 
155 /// \brief Find the constant materialization insertion point.
156 Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
157                                                    unsigned Idx) const {
158   // If the operand is a cast instruction, then we have to materialize the
159   // constant before the cast instruction.
160   if (Idx != ~0U) {
161     Value *Opnd = Inst->getOperand(Idx);
162     if (auto CastInst = dyn_cast<Instruction>(Opnd))
163       if (CastInst->isCast())
164         return CastInst;
165   }
166 
167   // The simple and common case. This also includes constant expressions.
168   if (!isa<PHINode>(Inst) && !Inst->isEHPad())
169     return Inst;
170 
171   // We can't insert directly before a phi node or an eh pad. Insert before
172   // the terminator of the incoming or dominating block.
173   assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
174   if (Idx != ~0U && isa<PHINode>(Inst))
175     return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator();
176 
177   // This must be an EH pad. Iterate over immediate dominators until we find a
178   // non-EH pad. We need to skip over catchswitch blocks, which are both EH pads
179   // and terminators.
180   auto IDom = DT->getNode(Inst->getParent())->getIDom();
181   while (IDom->getBlock()->isEHPad()) {
182     assert(Entry != IDom->getBlock() && "eh pad in entry block");
183     IDom = IDom->getIDom();
184   }
185 
186   return IDom->getBlock()->getTerminator();
187 }
188 
189 /// \brief Given \p BBs as input, find another set of BBs which collectively
190 /// dominates \p BBs and have the minimal sum of frequencies. Return the BB
191 /// set found in \p BBs.
192 static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
193                                  BasicBlock *Entry,
194                                  SmallPtrSet<BasicBlock *, 8> &BBs) {
195   assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
196   // Nodes on the current path to the root.
197   SmallPtrSet<BasicBlock *, 8> Path;
198   // Candidates includes any block 'BB' in set 'BBs' that is not strictly
199   // dominated by any other blocks in set 'BBs', and all nodes in the path
200   // in the dominator tree from Entry to 'BB'.
201   SmallPtrSet<BasicBlock *, 16> Candidates;
202   for (auto BB : BBs) {
203     Path.clear();
204     // Walk up the dominator tree until Entry or another BB in BBs
205     // is reached. Insert the nodes on the way to the Path.
206     BasicBlock *Node = BB;
207     // The "Path" is a candidate path to be added into Candidates set.
208     bool isCandidate = false;
209     do {
210       Path.insert(Node);
211       if (Node == Entry || Candidates.count(Node)) {
212         isCandidate = true;
213         break;
214       }
215       assert(DT.getNode(Node)->getIDom() &&
216              "Entry doens't dominate current Node");
217       Node = DT.getNode(Node)->getIDom()->getBlock();
218     } while (!BBs.count(Node));
219 
220     // If isCandidate is false, Node is another Block in BBs dominating
221     // current 'BB'. Drop the nodes on the Path.
222     if (!isCandidate)
223       continue;
224 
225     // Add nodes on the Path into Candidates.
226     Candidates.insert(Path.begin(), Path.end());
227   }
228 
229   // Sort the nodes in Candidates in top-down order and save the nodes
230   // in Orders.
231   unsigned Idx = 0;
232   SmallVector<BasicBlock *, 16> Orders;
233   Orders.push_back(Entry);
234   while (Idx != Orders.size()) {
235     BasicBlock *Node = Orders[Idx++];
236     for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
237       if (Candidates.count(ChildDomNode->getBlock()))
238         Orders.push_back(ChildDomNode->getBlock());
239     }
240   }
241 
242   // Visit Orders in bottom-up order.
243   using InsertPtsCostPair =
244       std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>;
245 
246   // InsertPtsMap is a map from a BB to the best insertion points for the
247   // subtree of BB (subtree not including the BB itself).
248   DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
249   InsertPtsMap.reserve(Orders.size() + 1);
250   for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
251     BasicBlock *Node = *RIt;
252     bool NodeInBBs = BBs.count(Node);
253     SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
254     BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
255 
256     // Return the optimal insert points in BBs.
257     if (Node == Entry) {
258       BBs.clear();
259       if (InsertPtsFreq > BFI.getBlockFreq(Node) ||
260           (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))
261         BBs.insert(Entry);
262       else
263         BBs.insert(InsertPts.begin(), InsertPts.end());
264       break;
265     }
266 
267     BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
268     // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
269     // will update its parent's ParentInsertPts and ParentPtsFreq.
270     SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
271     BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
272     // Choose to insert in Node or in subtree of Node.
273     // Don't hoist to EHPad because we may not find a proper place to insert
274     // in EHPad.
275     // If the total frequency of InsertPts is the same as the frequency of the
276     // target Node, and InsertPts contains more than one nodes, choose hoisting
277     // to reduce code size.
278     if (NodeInBBs ||
279         (!Node->isEHPad() &&
280          (InsertPtsFreq > BFI.getBlockFreq(Node) ||
281           (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {
282       ParentInsertPts.insert(Node);
283       ParentPtsFreq += BFI.getBlockFreq(Node);
284     } else {
285       ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
286       ParentPtsFreq += InsertPtsFreq;
287     }
288   }
289 }
290 
291 /// \brief Find an insertion point that dominates all uses.
292 SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
293     const ConstantInfo &ConstInfo) const {
294   assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
295   // Collect all basic blocks.
296   SmallPtrSet<BasicBlock *, 8> BBs;
297   SmallPtrSet<Instruction *, 8> InsertPts;
298   for (auto const &RCI : ConstInfo.RebasedConstants)
299     for (auto const &U : RCI.Uses)
300       BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
301 
302   if (BBs.count(Entry)) {
303     InsertPts.insert(&Entry->front());
304     return InsertPts;
305   }
306 
307   if (BFI) {
308     findBestInsertionSet(*DT, *BFI, Entry, BBs);
309     for (auto BB : BBs) {
310       BasicBlock::iterator InsertPt = BB->begin();
311       for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
312         ;
313       InsertPts.insert(&*InsertPt);
314     }
315     return InsertPts;
316   }
317 
318   while (BBs.size() >= 2) {
319     BasicBlock *BB, *BB1, *BB2;
320     BB1 = *BBs.begin();
321     BB2 = *std::next(BBs.begin());
322     BB = DT->findNearestCommonDominator(BB1, BB2);
323     if (BB == Entry) {
324       InsertPts.insert(&Entry->front());
325       return InsertPts;
326     }
327     BBs.erase(BB1);
328     BBs.erase(BB2);
329     BBs.insert(BB);
330   }
331   assert((BBs.size() == 1) && "Expected only one element.");
332   Instruction &FirstInst = (*BBs.begin())->front();
333   InsertPts.insert(findMatInsertPt(&FirstInst));
334   return InsertPts;
335 }
336 
337 /// \brief Record constant integer ConstInt for instruction Inst at operand
338 /// index Idx.
339 ///
340 /// The operand at index Idx is not necessarily the constant integer itself. It
341 /// could also be a cast instruction or a constant expression that uses the
342 // constant integer.
343 void ConstantHoistingPass::collectConstantCandidates(
344     ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
345     ConstantInt *ConstInt) {
346   unsigned Cost;
347   // Ask the target about the cost of materializing the constant for the given
348   // instruction and operand index.
349   if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
350     Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
351                               ConstInt->getValue(), ConstInt->getType());
352   else
353     Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
354                               ConstInt->getType());
355 
356   // Ignore cheap integer constants.
357   if (Cost > TargetTransformInfo::TCC_Basic) {
358     ConstCandMapType::iterator Itr;
359     bool Inserted;
360     std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
361     if (Inserted) {
362       ConstCandVec.push_back(ConstantCandidate(ConstInt));
363       Itr->second = ConstCandVec.size() - 1;
364     }
365     ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
366     DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx)))
367             dbgs() << "Collect constant " << *ConstInt << " from " << *Inst
368                    << " with cost " << Cost << '\n';
369           else
370           dbgs() << "Collect constant " << *ConstInt << " indirectly from "
371                  << *Inst << " via " << *Inst->getOperand(Idx) << " with cost "
372                  << Cost << '\n';
373     );
374   }
375 }
376 
377 /// \brief Check the operand for instruction Inst at index Idx.
378 void ConstantHoistingPass::collectConstantCandidates(
379     ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
380   Value *Opnd = Inst->getOperand(Idx);
381 
382   // Visit constant integers.
383   if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
384     collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
385     return;
386   }
387 
388   // Visit cast instructions that have constant integers.
389   if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
390     // Only visit cast instructions, which have been skipped. All other
391     // instructions should have already been visited.
392     if (!CastInst->isCast())
393       return;
394 
395     if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
396       // Pretend the constant is directly used by the instruction and ignore
397       // the cast instruction.
398       collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
399       return;
400     }
401   }
402 
403   // Visit constant expressions that have constant integers.
404   if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
405     // Only visit constant cast expressions.
406     if (!ConstExpr->isCast())
407       return;
408 
409     if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
410       // Pretend the constant is directly used by the instruction and ignore
411       // the constant expression.
412       collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
413       return;
414     }
415   }
416 }
417 
418 /// \brief Scan the instruction for expensive integer constants and record them
419 /// in the constant candidate vector.
420 void ConstantHoistingPass::collectConstantCandidates(
421     ConstCandMapType &ConstCandMap, Instruction *Inst) {
422   // Skip all cast instructions. They are visited indirectly later on.
423   if (Inst->isCast())
424     return;
425 
426   // Scan all operands.
427   for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
428     // The cost of materializing the constants (defined in
429     // `TargetTransformInfo::getIntImmCost`) for instructions which only take
430     // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So
431     // it's safe for us to collect constant candidates from all IntrinsicInsts.
432     if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) {
433       collectConstantCandidates(ConstCandMap, Inst, Idx);
434     }
435   } // end of for all operands
436 }
437 
438 /// \brief Collect all integer constants in the function that cannot be folded
439 /// into an instruction itself.
440 void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
441   ConstCandMapType ConstCandMap;
442   for (BasicBlock &BB : Fn)
443     for (Instruction &Inst : BB)
444       collectConstantCandidates(ConstCandMap, &Inst);
445 }
446 
447 // This helper function is necessary to deal with values that have different
448 // bit widths (APInt Operator- does not like that). If the value cannot be
449 // represented in uint64 we return an "empty" APInt. This is then interpreted
450 // as the value is not in range.
451 static Optional<APInt> calculateOffsetDiff(const APInt &V1, const APInt &V2) {
452   Optional<APInt> Res = None;
453   unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
454                 V1.getBitWidth() : V2.getBitWidth();
455   uint64_t LimVal1 = V1.getLimitedValue();
456   uint64_t LimVal2 = V2.getLimitedValue();
457 
458   if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
459     return Res;
460 
461   uint64_t Diff = LimVal1 - LimVal2;
462   return APInt(BW, Diff, true);
463 }
464 
465 // From a list of constants, one needs to picked as the base and the other
466 // constants will be transformed into an offset from that base constant. The
467 // question is which we can pick best? For example, consider these constants
468 // and their number of uses:
469 //
470 //  Constants| 2 | 4 | 12 | 42 |
471 //  NumUses  | 3 | 2 |  8 |  7 |
472 //
473 // Selecting constant 12 because it has the most uses will generate negative
474 // offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
475 // offsets lead to less optimal code generation, then there might be better
476 // solutions. Suppose immediates in the range of 0..35 are most optimally
477 // supported by the architecture, then selecting constant 2 is most optimal
478 // because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
479 // range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
480 // have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
481 // selecting the base constant the range of the offsets is a very important
482 // factor too that we take into account here. This algorithm calculates a total
483 // costs for selecting a constant as the base and substract the costs if
484 // immediates are out of range. It has quadratic complexity, so we call this
485 // function only when we're optimising for size and there are less than 100
486 // constants, we fall back to the straightforward algorithm otherwise
487 // which does not do all the offset calculations.
488 unsigned
489 ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
490                                            ConstCandVecType::iterator E,
491                                            ConstCandVecType::iterator &MaxCostItr) {
492   unsigned NumUses = 0;
493 
494   if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
495     for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
496       NumUses += ConstCand->Uses.size();
497       if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
498         MaxCostItr = ConstCand;
499     }
500     return NumUses;
501   }
502 
503   DEBUG(dbgs() << "== Maximize constants in range ==\n");
504   int MaxCost = -1;
505   for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
506     auto Value = ConstCand->ConstInt->getValue();
507     Type *Ty = ConstCand->ConstInt->getType();
508     int Cost = 0;
509     NumUses += ConstCand->Uses.size();
510     DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
511 
512     for (auto User : ConstCand->Uses) {
513       unsigned Opcode = User.Inst->getOpcode();
514       unsigned OpndIdx = User.OpndIdx;
515       Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
516       DEBUG(dbgs() << "Cost: " << Cost << "\n");
517 
518       for (auto C2 = S; C2 != E; ++C2) {
519         Optional<APInt> Diff = calculateOffsetDiff(
520                                    C2->ConstInt->getValue(),
521                                    ConstCand->ConstInt->getValue());
522         if (Diff) {
523           const int ImmCosts =
524             TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
525           Cost -= ImmCosts;
526           DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
527                        << "has penalty: " << ImmCosts << "\n"
528                        << "Adjusted cost: " << Cost << "\n");
529         }
530       }
531     }
532     DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
533     if (Cost > MaxCost) {
534       MaxCost = Cost;
535       MaxCostItr = ConstCand;
536       DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
537                    << "\n");
538     }
539   }
540   return NumUses;
541 }
542 
543 /// \brief Find the base constant within the given range and rebase all other
544 /// constants with respect to the base constant.
545 void ConstantHoistingPass::findAndMakeBaseConstant(
546     ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
547   auto MaxCostItr = S;
548   unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
549 
550   // Don't hoist constants that have only one use.
551   if (NumUses <= 1)
552     return;
553 
554   ConstantInfo ConstInfo;
555   ConstInfo.BaseConstant = MaxCostItr->ConstInt;
556   Type *Ty = ConstInfo.BaseConstant->getType();
557 
558   // Rebase the constants with respect to the base constant.
559   for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
560     APInt Diff = ConstCand->ConstInt->getValue() -
561                  ConstInfo.BaseConstant->getValue();
562     Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
563     ConstInfo.RebasedConstants.push_back(
564       RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
565   }
566   ConstantVec.push_back(std::move(ConstInfo));
567 }
568 
569 /// \brief Finds and combines constant candidates that can be easily
570 /// rematerialized with an add from a common base constant.
571 void ConstantHoistingPass::findBaseConstants() {
572   // Sort the constants by value and type. This invalidates the mapping!
573   std::sort(ConstCandVec.begin(), ConstCandVec.end(),
574             [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
575     if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
576       return LHS.ConstInt->getType()->getBitWidth() <
577              RHS.ConstInt->getType()->getBitWidth();
578     return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());
579   });
580 
581   // Simple linear scan through the sorted constant candidate vector for viable
582   // merge candidates.
583   auto MinValItr = ConstCandVec.begin();
584   for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();
585        CC != E; ++CC) {
586     if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {
587       // Check if the constant is in range of an add with immediate.
588       APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();
589       if ((Diff.getBitWidth() <= 64) &&
590           TTI->isLegalAddImmediate(Diff.getSExtValue()))
591         continue;
592     }
593     // We either have now a different constant type or the constant is not in
594     // range of an add with immediate anymore.
595     findAndMakeBaseConstant(MinValItr, CC);
596     // Start a new base constant search.
597     MinValItr = CC;
598   }
599   // Finalize the last base constant search.
600   findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
601 }
602 
603 /// \brief Updates the operand at Idx in instruction Inst with the result of
604 ///        instruction Mat. If the instruction is a PHI node then special
605 ///        handling for duplicate values form the same incoming basic block is
606 ///        required.
607 /// \return The update will always succeed, but the return value indicated if
608 ///         Mat was used for the update or not.
609 static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
610   if (auto PHI = dyn_cast<PHINode>(Inst)) {
611     // Check if any previous operand of the PHI node has the same incoming basic
612     // block. This is a very odd case that happens when the incoming basic block
613     // has a switch statement. In this case use the same value as the previous
614     // operand(s), otherwise we will fail verification due to different values.
615     // The values are actually the same, but the variable names are different
616     // and the verifier doesn't like that.
617     BasicBlock *IncomingBB = PHI->getIncomingBlock(Idx);
618     for (unsigned i = 0; i < Idx; ++i) {
619       if (PHI->getIncomingBlock(i) == IncomingBB) {
620         Value *IncomingVal = PHI->getIncomingValue(i);
621         Inst->setOperand(Idx, IncomingVal);
622         return false;
623       }
624     }
625   }
626 
627   Inst->setOperand(Idx, Mat);
628   return true;
629 }
630 
631 /// \brief Emit materialization code for all rebased constants and update their
632 /// users.
633 void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
634                                              Constant *Offset,
635                                              const ConstantUser &ConstUser) {
636   Instruction *Mat = Base;
637   if (Offset) {
638     Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
639                                                ConstUser.OpndIdx);
640     Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
641                                  "const_mat", InsertionPt);
642 
643     DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
644                  << " + " << *Offset << ") in BB "
645                  << Mat->getParent()->getName() << '\n' << *Mat << '\n');
646     Mat->setDebugLoc(ConstUser.Inst->getDebugLoc());
647   }
648   Value *Opnd = ConstUser.Inst->getOperand(ConstUser.OpndIdx);
649 
650   // Visit constant integer.
651   if (isa<ConstantInt>(Opnd)) {
652     DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
653     if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat) && Offset)
654       Mat->eraseFromParent();
655     DEBUG(dbgs() << "To    : " << *ConstUser.Inst << '\n');
656     return;
657   }
658 
659   // Visit cast instruction.
660   if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
661     assert(CastInst->isCast() && "Expected an cast instruction!");
662     // Check if we already have visited this cast instruction before to avoid
663     // unnecessary cloning.
664     Instruction *&ClonedCastInst = ClonedCastMap[CastInst];
665     if (!ClonedCastInst) {
666       ClonedCastInst = CastInst->clone();
667       ClonedCastInst->setOperand(0, Mat);
668       ClonedCastInst->insertAfter(CastInst);
669       // Use the same debug location as the original cast instruction.
670       ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
671       DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'
672                    << "To               : " << *ClonedCastInst << '\n');
673     }
674 
675     DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
676     updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ClonedCastInst);
677     DEBUG(dbgs() << "To    : " << *ConstUser.Inst << '\n');
678     return;
679   }
680 
681   // Visit constant expression.
682   if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
683     Instruction *ConstExprInst = ConstExpr->getAsInstruction();
684     ConstExprInst->setOperand(0, Mat);
685     ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
686                                                 ConstUser.OpndIdx));
687 
688     // Use the same debug location as the instruction we are about to update.
689     ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
690 
691     DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n'
692                  << "From              : " << *ConstExpr << '\n');
693     DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
694     if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ConstExprInst)) {
695       ConstExprInst->eraseFromParent();
696       if (Offset)
697         Mat->eraseFromParent();
698     }
699     DEBUG(dbgs() << "To    : " << *ConstUser.Inst << '\n');
700     return;
701   }
702 }
703 
704 /// \brief Hoist and hide the base constant behind a bitcast and emit
705 /// materialization code for derived constants.
706 bool ConstantHoistingPass::emitBaseConstants() {
707   bool MadeChange = false;
708   for (auto const &ConstInfo : ConstantVec) {
709     // Hoist and hide the base constant behind a bitcast.
710     SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
711     assert(!IPSet.empty() && "IPSet is empty");
712 
713     unsigned UsesNum = 0;
714     unsigned ReBasesNum = 0;
715     for (Instruction *IP : IPSet) {
716       IntegerType *Ty = ConstInfo.BaseConstant->getType();
717       Instruction *Base =
718           new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
719       DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
720                    << ") to BB " << IP->getParent()->getName() << '\n'
721                    << *Base << '\n');
722 
723       // Emit materialization code for all rebased constants.
724       unsigned Uses = 0;
725       for (auto const &RCI : ConstInfo.RebasedConstants) {
726         for (auto const &U : RCI.Uses) {
727           Uses++;
728           BasicBlock *OrigMatInsertBB =
729               findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
730           // If Base constant is to be inserted in multiple places,
731           // generate rebase for U using the Base dominating U.
732           if (IPSet.size() == 1 ||
733               DT->dominates(Base->getParent(), OrigMatInsertBB)) {
734             emitBaseConstants(Base, RCI.Offset, U);
735             ReBasesNum++;
736           }
737         }
738       }
739       UsesNum = Uses;
740 
741       // Use the same debug location as the last user of the constant.
742       assert(!Base->use_empty() && "The use list is empty!?");
743       assert(isa<Instruction>(Base->user_back()) &&
744              "All uses should be instructions.");
745       Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
746     }
747     (void)UsesNum;
748     (void)ReBasesNum;
749     // Expect all uses are rebased after rebase is done.
750     assert(UsesNum == ReBasesNum && "Not all uses are rebased");
751 
752     NumConstantsHoisted++;
753 
754     // Base constant is also included in ConstInfo.RebasedConstants, so
755     // deduct 1 from ConstInfo.RebasedConstants.size().
756     NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
757 
758     MadeChange = true;
759   }
760   return MadeChange;
761 }
762 
763 /// \brief Check all cast instructions we made a copy of and remove them if they
764 /// have no more users.
765 void ConstantHoistingPass::deleteDeadCastInst() const {
766   for (auto const &I : ClonedCastMap)
767     if (I.first->use_empty())
768       I.first->eraseFromParent();
769 }
770 
771 /// \brief Optimize expensive integer constants in the given function.
772 bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
773                                    DominatorTree &DT, BlockFrequencyInfo *BFI,
774                                    BasicBlock &Entry) {
775   this->TTI = &TTI;
776   this->DT = &DT;
777   this->BFI = BFI;
778   this->Entry = &Entry;
779   // Collect all constant candidates.
780   collectConstantCandidates(Fn);
781 
782   // There are no constant candidates to worry about.
783   if (ConstCandVec.empty())
784     return false;
785 
786   // Combine constants that can be easily materialized with an add from a common
787   // base constant.
788   findBaseConstants();
789 
790   // There are no constants to emit.
791   if (ConstantVec.empty())
792     return false;
793 
794   // Finally hoist the base constant and emit materialization code for dependent
795   // constants.
796   bool MadeChange = emitBaseConstants();
797 
798   // Cleanup dead instructions.
799   deleteDeadCastInst();
800 
801   return MadeChange;
802 }
803 
804 PreservedAnalyses ConstantHoistingPass::run(Function &F,
805                                             FunctionAnalysisManager &AM) {
806   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
807   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
808   auto BFI = ConstHoistWithBlockFrequency
809                  ? &AM.getResult<BlockFrequencyAnalysis>(F)
810                  : nullptr;
811   if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
812     return PreservedAnalyses::all();
813 
814   PreservedAnalyses PA;
815   PA.preserveSet<CFGAnalyses>();
816   return PA;
817 }
818