1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include <numeric>
17 #include <stack>
18 
19 #define DEBUG_TYPE "shrinkwrapping"
20 
21 using namespace llvm;
22 
23 namespace opts {
24 
25 extern cl::opt<bool> TimeOpts;
26 extern cl::OptionCategory BoltOptCategory;
27 
28 static cl::opt<unsigned> ShrinkWrappingThreshold(
29     "shrink-wrapping-threshold",
30     cl::desc("Percentage of prologue execution count to use as threshold when"
31              " evaluating whether a block is cold enough to be profitable to"
32              " move eligible spills there"),
33     cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
34 } // namespace opts
35 
36 namespace llvm {
37 namespace bolt {
38 
39 void CalleeSavedAnalysis::analyzeSaves() {
40   ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
41   StackReachingUses &SRU = Info.getStackReachingUses();
42   auto &InsnToBB = Info.getInsnToBBMap();
43   BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
44 
45   LLVM_DEBUG(dbgs() << "Checking spill locations\n");
46   for (BinaryBasicBlock &BB : BF) {
47     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
48     const MCInst *Prev = nullptr;
49     for (MCInst &Inst : BB) {
50       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
51         // Blacklist weird stores we don't understand
52         if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
53             FIE->IsStoreFromReg) {
54           BlacklistedRegs.set(FIE->RegOrImm);
55           CalleeSaved.reset(FIE->RegOrImm);
56           Prev = &Inst;
57           continue;
58         }
59 
60         if (!FIE->IsStore || !FIE->IsStoreFromReg ||
61             BlacklistedRegs[FIE->RegOrImm]) {
62           Prev = &Inst;
63           continue;
64         }
65 
66         // If this reg is defined locally, it is not a callee-saved reg
67         if (RD.isReachedBy(FIE->RegOrImm,
68                            Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
69           BlacklistedRegs.set(FIE->RegOrImm);
70           CalleeSaved.reset(FIE->RegOrImm);
71           Prev = &Inst;
72           continue;
73         }
74 
75         // If this stack position is accessed in another function, we are
76         // probably dealing with a parameter passed in a stack -- do not mess
77         // with it
78         if (SRU.isStoreUsed(*FIE,
79                             Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
80             /*IncludeLocalAccesses=*/false) {
81           BlacklistedRegs.set(FIE->RegOrImm);
82           CalleeSaved.reset(FIE->RegOrImm);
83           Prev = &Inst;
84           continue;
85         }
86 
87         // If this stack position is loaded elsewhere in another reg, we can't
88         // update it, so blacklist it.
89         if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
90                                                   : SRU.expr_begin(BB))) {
91           BlacklistedRegs.set(FIE->RegOrImm);
92           CalleeSaved.reset(FIE->RegOrImm);
93           Prev = &Inst;
94           continue;
95         }
96 
97         // Ignore regs with multiple saves
98         if (CalleeSaved[FIE->RegOrImm]) {
99           BlacklistedRegs.set(FIE->RegOrImm);
100           CalleeSaved.reset(FIE->RegOrImm);
101           Prev = &Inst;
102           continue;
103         }
104 
105         CalleeSaved.set(FIE->RegOrImm);
106         SaveFIEByReg[FIE->RegOrImm] = &*FIE;
107         SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
108         BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
109         OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
110         LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
111                           << FIE->RegOrImm << "\n");
112       }
113       Prev = &Inst;
114     }
115   }
116 }
117 
118 void CalleeSavedAnalysis::analyzeRestores() {
119   ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
120 
121   // Now compute all restores of these callee-saved regs
122   for (BinaryBasicBlock &BB : BF) {
123     const MCInst *Prev = nullptr;
124     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
125       MCInst &Inst = *I;
126       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
127         if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
128           Prev = &Inst;
129           continue;
130         }
131 
132         // If this reg is used locally after a restore, then we are probably
133         // not dealing with a callee-saved reg. Except if this use is by
134         // another store, but we don't cover this case yet.
135         // Also not callee-saved if this load accesses caller stack or isn't
136         // simple.
137         if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
138             RU.isReachedBy(FIE->RegOrImm,
139                            Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
140           CalleeSaved.reset(FIE->RegOrImm);
141           Prev = &Inst;
142           continue;
143         }
144         // If stack offsets between saves/store don't agree with each other,
145         // we don't completely understand what's happening here
146         if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
147           CalleeSaved.reset(FIE->RegOrImm);
148           LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
149                                "mismatching restore: "
150                             << FIE->RegOrImm << "\n");
151           Prev = &Inst;
152           continue;
153         }
154 
155         LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
156                           << "\n");
157         if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
158           LoadFIEByReg[FIE->RegOrImm] = &*FIE;
159         BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
160                               AllocatorId);
161         HasRestores.set(FIE->RegOrImm);
162       }
163       Prev = &Inst;
164     }
165   }
166 }
167 
168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
169   std::vector<MCInst *> Results;
170   for (BinaryBasicBlock &BB : BF)
171     for (MCInst &Inst : BB)
172       if (getSavedReg(Inst) == Reg)
173         Results.push_back(&Inst);
174   return Results;
175 }
176 
177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
178   std::vector<MCInst *> Results;
179   for (BinaryBasicBlock &BB : BF)
180     for (MCInst &Inst : BB)
181       if (getRestoredReg(Inst) == Reg)
182         Results.push_back(&Inst);
183   return Results;
184 }
185 
186 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
187   for (BinaryBasicBlock &BB : BF) {
188     for (MCInst &Inst : BB) {
189       BC.MIB->removeAnnotation(Inst, getSaveTag());
190       BC.MIB->removeAnnotation(Inst, getRestoreTag());
191     }
192   }
193 }
194 
195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
196   if (BlacklistedRegions[Offset] < Size)
197     BlacklistedRegions[Offset] = Size;
198 }
199 
200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
201   for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
202     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
203       return true;
204   return false;
205 }
206 
207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
208                                                      int64_t Size) {
209   bool HasConflict = false;
210   for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
211     std::pair<const int64_t, int64_t> &Elem = *Iter;
212     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
213         (Offset != Elem.first || Size != Elem.second)) {
214       Iter = AvailableRegions.erase(Iter);
215       HasConflict = true;
216       continue;
217     }
218     ++Iter;
219   }
220   if (HasConflict) {
221     blacklistRegion(Offset, Size);
222     return true;
223   }
224   return false;
225 }
226 
227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
228   StackPointerTracking &SPT = Info.getStackPointerTracking();
229   if (!BC.MII->get(Point.getOpcode())
230            .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
231     return;
232 
233   int SPVal, FPVal;
234   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
235   std::pair<MCPhysReg, int64_t> FP;
236 
237   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
238     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
239   else
240     FP = std::make_pair(0, 0);
241   std::pair<MCPhysReg, int64_t> SP;
242 
243   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
244     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
245   else
246     SP = std::make_pair(0, 0);
247 
248   int64_t Output;
249   if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
250     return;
251 
252   // Not your regular frame pointer initialization... bail
253   if (Output != SPVal)
254     blacklistRegion(0, 0);
255 }
256 
257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
258   StackPointerTracking &SPT = Info.getStackPointerTracking();
259   if (!BC.MII->get(Point.getOpcode())
260            .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
261     return;
262   // Check if the definition of SP comes from FP -- in this case, this
263   // value may need to be updated depending on our stack layout changes
264   const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
265   unsigned NumDefs = InstInfo.getNumDefs();
266   bool UsesFP = false;
267   for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
268        ++I) {
269     MCOperand &Operand = Point.getOperand(I);
270     if (!Operand.isReg())
271       continue;
272     if (Operand.getReg() == BC.MIB->getFramePointer()) {
273       UsesFP = true;
274       break;
275     }
276   }
277   if (!UsesFP)
278     return;
279 
280   // Setting up evaluation
281   int SPVal, FPVal;
282   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
283   std::pair<MCPhysReg, int64_t> FP;
284 
285   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
286     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
287   else
288     FP = std::make_pair(0, 0);
289   std::pair<MCPhysReg, int64_t> SP;
290 
291   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
292     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
293   else
294     SP = std::make_pair(0, 0);
295 
296   int64_t Output;
297   if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
298     return;
299 
300   // If the value is the same of FP, no need to adjust it
301   if (Output == FPVal)
302     return;
303 
304   // If an allocation happened through FP, bail
305   if (Output <= SPVal) {
306     blacklistRegion(0, 0);
307     return;
308   }
309 
310   // We are restoring SP to an old value based on FP. Mark it as a stack
311   // access to be fixed later.
312   BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
313 }
314 
315 void StackLayoutModifier::classifyStackAccesses() {
316   // Understand when stack slots are being used non-locally
317   StackReachingUses &SRU = Info.getStackReachingUses();
318 
319   for (BinaryBasicBlock &BB : BF) {
320     const MCInst *Prev = nullptr;
321     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
322       MCInst &Inst = *I;
323       checkFramePointerInitialization(Inst);
324       checkStackPointerRestore(Inst);
325       ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
326       if (!FIEX) {
327         Prev = &Inst;
328         continue;
329       }
330       if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
331         blacklistRegion(FIEX->StackOffset, FIEX->Size);
332         Prev = &Inst;
333         continue;
334       }
335       // If this stack position is accessed in another function, we are
336       // probably dealing with a parameter passed in a stack -- do not mess
337       // with it
338       if (SRU.isStoreUsed(*FIEX,
339                           Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
340                           /*IncludeLocalAccesses=*/false)) {
341         blacklistRegion(FIEX->StackOffset, FIEX->Size);
342         Prev = &Inst;
343         continue;
344       }
345       // Now we have a clear stack slot access. Check if its blacklisted or if
346       // it conflicts with another chunk.
347       if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
348           blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
349         Prev = &Inst;
350         continue;
351       }
352       // We are free to go. Add it as available stack slot which we know how
353       // to move it.
354       AvailableRegions[FIEX->StackOffset] = FIEX->Size;
355       BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
356       RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
357       RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
358       LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
359                         << (int)FIEX->Size << "\n");
360     }
361   }
362 }
363 
364 void StackLayoutModifier::classifyCFIs() {
365   std::stack<std::pair<int64_t, uint16_t>> CFIStack;
366   int64_t CfaOffset = -8;
367   uint16_t CfaReg = 7;
368 
369   auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
370     const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
371     if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
372       BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
373       LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
374     } else {
375       IsSimple = false;
376       return;
377     }
378   };
379 
380   for (BinaryBasicBlock *&BB : BF.layout()) {
381     for (MCInst &Inst : *BB) {
382       if (!BC.MIB->isCFI(Inst))
383         continue;
384       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
385       switch (CFI->getOperation()) {
386       case MCCFIInstruction::OpDefCfa:
387         CfaOffset = -CFI->getOffset();
388         recordAccess(&Inst, CfaOffset);
389         LLVM_FALLTHROUGH;
390       case MCCFIInstruction::OpDefCfaRegister:
391         CfaReg = CFI->getRegister();
392         break;
393       case MCCFIInstruction::OpDefCfaOffset:
394         CfaOffset = -CFI->getOffset();
395         recordAccess(&Inst, CfaOffset);
396         break;
397       case MCCFIInstruction::OpOffset:
398         recordAccess(&Inst, CFI->getOffset());
399         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
400                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
401                                                     /*isEH=*/false),
402                               AllocatorId);
403         break;
404       case MCCFIInstruction::OpSameValue:
405         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
406                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
407                                                     /*isEH=*/false),
408                               AllocatorId);
409         break;
410       case MCCFIInstruction::OpRememberState:
411         CFIStack.push(std::make_pair(CfaOffset, CfaReg));
412         break;
413       case MCCFIInstruction::OpRestoreState: {
414         assert(!CFIStack.empty() && "Corrupt CFI stack");
415         std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
416         CFIStack.pop();
417         CfaOffset = Elem.first;
418         CfaReg = Elem.second;
419         break;
420       }
421       case MCCFIInstruction::OpRelOffset:
422       case MCCFIInstruction::OpAdjustCfaOffset:
423         llvm_unreachable("Unhandled AdjustCfaOffset");
424         break;
425       default:
426         break;
427       }
428     }
429   }
430 }
431 
432 void StackLayoutModifier::scheduleChange(
433     MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
434   auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
435       Inst, getTodoTag(), AllocatorId);
436   WList.push_back(Item);
437 }
438 
439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
440   if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
441     return false;
442 
443   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
444   if (!FIE)
445     return false;
446 
447   return canCollapseRegion(FIE->StackOffset);
448 }
449 
450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
451   if (!IsInitialized)
452     initialize();
453   if (!IsSimple)
454     return false;
455 
456   if (CollapsedRegions.count(RegionAddr))
457     return true;
458 
459   // Check if it is possible to readjust all accesses below RegionAddr
460   if (!BlacklistedRegions.empty())
461     return false;
462 
463   return true;
464 }
465 
466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
467   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
468   if (!FIE)
469     return false;
470   int64_t RegionAddr = FIE->StackOffset;
471   int64_t RegionSz = FIE->Size;
472   return collapseRegion(DeletedPush, RegionAddr, RegionSz);
473 }
474 
475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
476                                          int64_t RegionSz) {
477   if (!canCollapseRegion(RegionAddr))
478     return false;
479 
480   assert(IsInitialized);
481   StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
482 
483   for (BinaryBasicBlock &BB : BF) {
484     for (MCInst &Inst : BB) {
485       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
486         continue;
487       auto Slot =
488           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
489               Inst, getSlotTag());
490       if (!AvailableRegions.count(Slot))
491         continue;
492       // We need to ensure this access is affected by the deleted push
493       if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
494         continue;
495 
496       if (BC.MIB->isCFI(Inst)) {
497         if (Slot > RegionAddr)
498           continue;
499         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
500         continue;
501       }
502       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
503       if (!FIE) {
504         if (Slot > RegionAddr)
505           continue;
506         // SP update based on frame pointer
507         scheduleChange(
508             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
509         continue;
510       }
511 
512       if (Slot == RegionAddr) {
513         BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
514         continue;
515       }
516       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
517         continue;
518 
519       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
520         continue;
521 
522       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
523         continue;
524 
525       scheduleChange(
526           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
527     }
528   }
529 
530   CollapsedRegions.insert(RegionAddr);
531   return true;
532 }
533 
534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
535   for (BinaryBasicBlock &BB : BF) {
536     for (MCInst &Inst : BB) {
537       if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
538         continue;
539       BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
540       scheduleChange(
541           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
542     }
543   }
544 }
545 
546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
547   if (!IsInitialized)
548     initialize();
549   if (!IsSimple)
550     return false;
551 
552   StackPointerTracking &SPT = Info.getStackPointerTracking();
553   int64_t RegionAddr = SPT.getStateBefore(P)->first;
554   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
555     return false;
556 
557   if (InsertedRegions.count(RegionAddr))
558     return true;
559 
560   // Check if we are going to screw up stack accesses at call sites that
561   // pass parameters via stack
562   if (!BlacklistedRegions.empty())
563     return false;
564 
565   return true;
566 }
567 
568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
569   if (!canInsertRegion(P))
570     return false;
571 
572   assert(IsInitialized);
573   StackPointerTracking &SPT = Info.getStackPointerTracking();
574   // This RegionAddr is slightly different from the one seen in collapseRegion
575   // This is the value of SP before the allocation the user wants to make.
576   int64_t RegionAddr = SPT.getStateBefore(P)->first;
577   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
578     return false;
579 
580   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
581 
582   for (BinaryBasicBlock &BB : BF) {
583     for (MCInst &Inst : BB) {
584       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
585         continue;
586       auto Slot =
587           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
588               Inst, getSlotTag());
589       if (!AvailableRegions.count(Slot))
590         continue;
591 
592       if (!(DA.doesADominateB(P, Inst)))
593         continue;
594 
595       if (BC.MIB->isCFI(Inst)) {
596         if (Slot >= RegionAddr)
597           continue;
598         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
599         continue;
600       }
601       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
602       if (!FIE) {
603         if (Slot >= RegionAddr)
604           continue;
605         scheduleChange(
606             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
607         continue;
608       }
609 
610       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
611         continue;
612       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
613         continue;
614       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
615         continue;
616       scheduleChange(
617           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
618     }
619   }
620 
621   InsertedRegions.insert(RegionAddr);
622   return true;
623 }
624 
625 void StackLayoutModifier::performChanges() {
626   std::set<uint32_t> ModifiedCFIIndices;
627   for (BinaryBasicBlock &BB : BF) {
628     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
629       MCInst &Inst = *I;
630       if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
631         assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
632         BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
633       }
634       if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
635         continue;
636       auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
637           Inst, getTodoTag());
638       int64_t Adjustment = 0;
639       WorklistItem::ActionType AdjustmentType = WorklistItem::None;
640       for (WorklistItem &WI : WList) {
641         if (WI.Action == WorklistItem::None)
642           continue;
643         assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
644                WI.Action == WorklistItem::AdjustCFI);
645         assert((AdjustmentType == WorklistItem::None ||
646                 AdjustmentType == WI.Action) &&
647                "Conflicting actions requested at the same program point");
648         AdjustmentType = WI.Action;
649         Adjustment += WI.OffsetUpdate;
650       }
651       if (!Adjustment)
652         continue;
653       if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
654         assert(BC.MIB->isCFI(Inst));
655         uint32_t CFINum = Inst.getOperand(0).getImm();
656         if (ModifiedCFIIndices.count(CFINum))
657           continue;
658         ModifiedCFIIndices.insert(CFINum);
659         const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
660         const MCCFIInstruction::OpType Operation = CFI->getOperation();
661         if (Operation == MCCFIInstruction::OpDefCfa ||
662             Operation == MCCFIInstruction::OpDefCfaOffset)
663           Adjustment = 0 - Adjustment;
664         LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
665                           << " to " << (CFI->getOffset() + Adjustment) << "\n");
666         BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
667         continue;
668       }
669       int32_t SrcImm = 0;
670       MCPhysReg Reg = 0;
671       MCPhysReg StackPtrReg = 0;
672       int64_t StackOffset = 0;
673       bool IsIndexed = false;
674       bool IsLoad = false;
675       bool IsStore = false;
676       bool IsSimple = false;
677       bool IsStoreFromReg = false;
678       uint8_t Size = 0;
679       bool Success = false;
680       Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
681                                       Reg, SrcImm, StackPtrReg, StackOffset,
682                                       Size, IsSimple, IsIndexed);
683       if (!Success) {
684         // SP update based on FP value
685         Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
686         assert(Success);
687         continue;
688       }
689       assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
690       if (StackPtrReg != BC.MIB->getFramePointer())
691         Adjustment = -Adjustment;
692       if (IsLoad)
693         Success = BC.MIB->createRestoreFromStack(
694             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
695       else if (IsStore)
696         Success = BC.MIB->createSaveToStack(
697             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
698       LLVM_DEBUG({
699         dbgs() << "Adjusted instruction: ";
700         Inst.dump();
701       });
702       assert(Success);
703     }
704   }
705 }
706 
707 void StackLayoutModifier::initialize() {
708   classifyStackAccesses();
709   classifyCFIs();
710   IsInitialized = true;
711 }
712 
713 uint64_t ShrinkWrapping::SpillsMovedRegularMode = 0;
714 uint64_t ShrinkWrapping::SpillsMovedPushPopMode = 0;
715 
716 using BBIterTy = BinaryBasicBlock::iterator;
717 
718 void ShrinkWrapping::classifyCSRUses() {
719   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
720   StackPointerTracking &SPT = Info.getStackPointerTracking();
721   UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
722                                      BitVector(DA.NumInstrs, false));
723 
724   const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
725   for (BinaryBasicBlock &BB : BF) {
726     for (MCInst &Inst : BB) {
727       if (BC.MIB->isCFI(Inst))
728         continue;
729       BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
730       BC.MIB->getTouchedRegs(Inst, BV);
731       BV &= CSA.CalleeSaved;
732       for (int I = BV.find_first(); I != -1; I = BV.find_next(I)) {
733         if (I == 0)
734           continue;
735         if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
736           UsesByReg[I].set(DA.ExprToIdx[&Inst]);
737       }
738       if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
739         continue;
740       BV = CSA.CalleeSaved;
741       BV &= FPAliases;
742       for (int I = BV.find_first(); I > 0; I = BV.find_next(I))
743         UsesByReg[I].set(DA.ExprToIdx[&Inst]);
744     }
745   }
746 }
747 
748 void ShrinkWrapping::pruneUnwantedCSRs() {
749   BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
750   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
751     if (!CSA.CalleeSaved[I])
752       continue;
753     if (ParamRegs[I]) {
754       CSA.CalleeSaved.reset(I);
755       continue;
756     }
757     if (UsesByReg[I].empty()) {
758       LLVM_DEBUG(
759           dbgs()
760           << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
761           << "\n");
762       CSA.CalleeSaved.reset(I);
763       continue;
764     }
765     if (!CSA.HasRestores[I]) {
766       LLVM_DEBUG(
767           dbgs() << "Dismissing Callee-Saved Reg because it does not have "
768                     "restores:"
769                  << I << "\n");
770       CSA.CalleeSaved.reset(I);
771     }
772   }
773 }
774 
775 void ShrinkWrapping::computeSaveLocations() {
776   SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs());
777   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
778   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
779   StackPointerTracking &SPT = Info.getStackPointerTracking();
780 
781   LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
782   for (BinaryBasicBlock &BB : BF) {
783     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
784 
785     MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
786     if (!First)
787       continue;
788 
789     // Use reaching instructions to detect if we are inside a loop - if we
790     // are, do not consider this BB as valid placement for saves.
791     if (RI.isInLoop(BB))
792       continue;
793 
794     const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
795     // If we don't know stack state at this point, bail
796     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
797         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
798       continue;
799 
800     for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
801       if (!CSA.CalleeSaved[I])
802         continue;
803 
804       BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
805       for (int J = UsesByReg[I].find_first(); J > 0;
806            J = UsesByReg[I].find_next(J))
807         if (DA.doesADominateB(*First, J))
808           BBDominatedUses.set(J);
809       LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
810                         << BBDominatedUses.count() << " uses for reg " << I
811                         << ". Total uses for reg is " << UsesByReg[I].count()
812                         << "\n");
813       BBDominatedUses &= UsesByReg[I];
814       if (BBDominatedUses == UsesByReg[I]) {
815         LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
816                           << " as a save pos for " << I << "\n");
817         SavePos[I].insert(First);
818         LLVM_DEBUG({
819           dbgs() << "Dominated uses are:\n";
820           for (int J = UsesByReg[I].find_first(); J > 0;
821                J = UsesByReg[I].find_next(J)) {
822             dbgs() << "Idx " << J << ": ";
823             DA.Expressions[J]->dump();
824           }
825         });
826       }
827     }
828   }
829 
830   BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(),
831                                         std::numeric_limits<uint64_t>::max());
832   BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr);
833   auto &InsnToBB = Info.getInsnToBBMap();
834   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
835     if (!CSA.CalleeSaved[I])
836       continue;
837 
838     for (MCInst *Pos : SavePos[I]) {
839       BinaryBasicBlock *BB = InsnToBB[Pos];
840       uint64_t Count = BB->getExecutionCount();
841       if (Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
842           Count < BestSaveCount[I]) {
843         BestSavePos[I] = Pos;
844         BestSaveCount[I] = Count;
845       }
846     }
847   }
848 }
849 
850 void ShrinkWrapping::computeDomOrder() {
851   std::vector<MCPhysReg> Order;
852   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
853     Order.push_back(I);
854   }
855 
856   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
857   auto &InsnToBB = Info.getInsnToBBMap();
858   std::sort(Order.begin(), Order.end(),
859             [&](const MCPhysReg &A, const MCPhysReg &B) {
860               BinaryBasicBlock *BBA =
861                   BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr;
862               BinaryBasicBlock *BBB =
863                   BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr;
864               if (BBA == BBB)
865                 return A < B;
866               if (!BBA && BBB)
867                 return false;
868               if (BBA && !BBB)
869                 return true;
870               if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B]))
871                 return true;
872               if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A]))
873                 return false;
874               return A < B;
875             });
876 
877   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
878     DomOrder[Order[I]] = I;
879 }
880 
881 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
882                                        uint64_t &TotalEstimatedWin) {
883   const uint64_t CurSavingCost = CSA.SavingCost[CSR];
884   if (!CSA.CalleeSaved[CSR])
885     return false;
886 
887   uint64_t BestCount = BestSaveCount[CSR];
888   BestPosSave = BestSavePos[CSR];
889   bool ShouldMove = false;
890   if (BestCount != std::numeric_limits<uint64_t>::max() &&
891       BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) {
892     LLVM_DEBUG({
893       auto &InsnToBB = Info.getInsnToBBMap();
894       dbgs() << "Better position for saves found in func " << BF.getPrintName()
895              << " count << " << BF.getKnownExecutionCount() << "\n";
896       dbgs() << "Reg: " << CSR
897              << "; New BB: " << InsnToBB[BestPosSave]->getName()
898              << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
899     });
900     TotalEstimatedWin += CurSavingCost - BestCount;
901     ShouldMove = true;
902   }
903 
904   if (!ShouldMove)
905     return false;
906   if (!BestPosSave) {
907     LLVM_DEBUG({
908       dbgs() << "Dropping opportunity because we don't know where to put "
909                 "stores -- total est. freq reduc: "
910              << TotalEstimatedWin << "\n";
911     });
912     return false;
913   }
914   return true;
915 }
916 
917 /// Auxiliar function used to create basic blocks for critical edges and update
918 /// the dominance frontier with these new locations
919 void ShrinkWrapping::splitFrontierCritEdges(
920     BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
921     const SmallVector<bool, 4> &IsCritEdge,
922     const SmallVector<BinaryBasicBlock *, 4> &From,
923     const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
924   LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
925                     << BF.getPrintName() << "\n");
926   // For every FromBB, there might be one or more critical edges, with
927   // To[I] containing destination BBs. It's important to memorize
928   // the original size of the Frontier as we may append to it while splitting
929   // critical edges originating with blocks with multiple destinations.
930   for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
931     if (!IsCritEdge[I])
932       continue;
933     if (To[I].empty())
934       continue;
935     BinaryBasicBlock *FromBB = From[I];
936     LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
937                       << "\n");
938     // Split edge for every DestinationBBs
939     for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
940       BinaryBasicBlock *DestinationBB = To[I][DI];
941       LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
942       BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
943       // Insert dummy instruction so this BB is never empty (we need this for
944       // PredictiveStackPointerTracking to work, since it annotates instructions
945       // and not BBs).
946       if (NewBB->empty()) {
947         MCInst NewInst;
948         BC.MIB->createNoop(NewInst);
949         NewBB->addInstruction(std::move(NewInst));
950         scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
951       }
952 
953       // Update frontier
954       ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
955       if (DI == 0) {
956         // Update frontier inplace
957         Frontier[I] = NewFrontierPP;
958         LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
959                           << '\n');
960       } else {
961         // Append new frontier to the end of the list
962         Frontier.push_back(NewFrontierPP);
963         LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
964                           << '\n');
965       }
966     }
967   }
968 }
969 
970 SmallVector<ProgramPoint, 4>
971 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
972                                    uint64_t TotalEstimatedWin) {
973   SmallVector<ProgramPoint, 4> Frontier;
974   SmallVector<bool, 4> IsCritEdge;
975   bool CannotPlace = false;
976   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
977 
978   SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
979   SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
980   // In case of a critical edge, we need to create extra BBs to host restores
981   // into edges transitioning to the dominance frontier, otherwise we pull these
982   // restores to inside the dominated area.
983   Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
984   LLVM_DEBUG({
985     dbgs() << "Dumping dominance frontier for ";
986     BC.printInstruction(dbgs(), *BestPosSave);
987     for (ProgramPoint &PP : Frontier)
988       if (PP.isInst())
989         BC.printInstruction(dbgs(), *PP.getInst());
990       else
991         dbgs() << PP.getBB()->getName() << "\n";
992   });
993   for (ProgramPoint &PP : Frontier) {
994     bool HasCritEdges = false;
995     if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
996         doesInstUsesCSR(*PP.getInst(), CSR))
997       CannotPlace = true;
998     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
999     CritEdgesFrom.emplace_back(FrontierBB);
1000     CritEdgesTo.emplace_back(0);
1001     SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
1002     // Check for invoke instructions at the dominance frontier, which indicates
1003     // the landing pad is not dominated.
1004     if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
1005       LLVM_DEBUG(
1006           dbgs() << "Bailing on restore placement to avoid LP splitting\n");
1007       Frontier.clear();
1008       return Frontier;
1009     }
1010     doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
1011       if (!DA.doesADominateB(*BestPosSave, P)) {
1012         Dests.emplace_back(Info.getParentBB(P));
1013         return;
1014       }
1015       HasCritEdges = true;
1016     });
1017     IsCritEdge.push_back(HasCritEdges);
1018   }
1019   // Restores cannot be placed in empty BBs because we have a dataflow
1020   // analysis that depends on insertions happening before real instructions
1021   // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1022   // dummy nop that is scheduled to be removed later.
1023   bool InvalidateRequired = false;
1024   for (BinaryBasicBlock *&BB : BF.layout()) {
1025     if (BB->size() != 0)
1026       continue;
1027     MCInst NewInst;
1028     BC.MIB->createNoop(NewInst);
1029     auto II = BB->addInstruction(std::move(NewInst));
1030     scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1031     InvalidateRequired = true;
1032   }
1033   if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1034     LLVM_DEBUG({
1035       dbgs() << "Now detected critical edges in the following frontier:\n";
1036       for (ProgramPoint &PP : Frontier) {
1037         if (PP.isBB()) {
1038           dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
1039         } else {
1040           dbgs() << "  Inst: ";
1041           PP.getInst()->dump();
1042         }
1043       }
1044     });
1045     splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1046                            CritEdgesTo);
1047     InvalidateRequired = true;
1048   }
1049   if (InvalidateRequired) {
1050     // BitVectors that represent all insns of the function are invalid now
1051     // since we changed BBs/Insts. Re-run steps that depend on pointers being
1052     // valid
1053     Info.invalidateAll();
1054     classifyCSRUses();
1055   }
1056   if (CannotPlace) {
1057     LLVM_DEBUG({
1058       dbgs() << "Dropping opportunity because restore placement failed"
1059                 " -- total est. freq reduc: "
1060              << TotalEstimatedWin << "\n";
1061     });
1062     Frontier.clear();
1063     return Frontier;
1064   }
1065   return Frontier;
1066 }
1067 
1068 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1069                                           int64_t SaveOffset) {
1070   if (FA.requiresAlignment(BF)) {
1071     LLVM_DEBUG({
1072       dbgs() << "Reg " << CSR
1073              << " is not using push/pops due to function "
1074                 "alignment requirements.\n";
1075     });
1076     return false;
1077   }
1078   for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1079     if (!SLM.canCollapseRegion(Save)) {
1080       LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1081       return false;
1082     }
1083   }
1084   // Abort if one of the restores for this CSR is not a POP.
1085   for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1086     if (!BC.MIB->isPop(*Load)) {
1087       LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1088       return false;
1089     }
1090   }
1091 
1092   StackPointerTracking &SPT = Info.getStackPointerTracking();
1093   // Abort if we are inserting a push into an entry BB (offset -8) and this
1094   // func sets up a frame pointer.
1095   if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1096       SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1097     LLVM_DEBUG({
1098       dbgs() << "Reg " << CSR
1099              << " cannot insert region or we are "
1100                 "trying to insert a push into entry bb.\n";
1101     });
1102     return false;
1103   }
1104   return true;
1105 }
1106 
1107 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1108     const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1109     unsigned CSR) {
1110   SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1111   // Moving pop locations to the correct sp offset
1112   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1113   StackPointerTracking &SPT = Info.getStackPointerTracking();
1114   for (ProgramPoint &PP : FixedRestorePoints) {
1115     BinaryBasicBlock *BB = Info.getParentBB(PP);
1116     bool Found = false;
1117     if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1118         SaveOffset) {
1119       BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1120       BV &= UsesByReg[CSR];
1121       if (!BV.any()) {
1122         Found = true;
1123         PP = BB;
1124         continue;
1125       }
1126     }
1127     for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
1128       if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
1129         BitVector BV = *RI.getStateAt(*RIt);
1130         BV &= UsesByReg[CSR];
1131         if (!BV.any()) {
1132           Found = true;
1133           PP = &*RIt;
1134           break;
1135         }
1136       }
1137     }
1138     if (!Found) {
1139       LLVM_DEBUG({
1140         dbgs() << "Could not find restore insertion point for " << CSR
1141                << ", falling back to load/store mode\n";
1142       });
1143       FixedRestorePoints.clear();
1144       return FixedRestorePoints;
1145     }
1146   }
1147   return FixedRestorePoints;
1148 }
1149 
1150 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1151                                                     bool UsePushPops) {
1152 
1153   for (BinaryBasicBlock *&BB : BF.layout()) {
1154     std::vector<MCInst *> CFIs;
1155     for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
1156       MCInst &Inst = *I;
1157       if (BC.MIB->isCFI(Inst)) {
1158         // Delete all offset CFIs related to this CSR
1159         if (SLM.getOffsetCFIReg(Inst) == CSR) {
1160           HasDeletedOffsetCFIs[CSR] = true;
1161           scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1162           continue;
1163         }
1164         CFIs.push_back(&Inst);
1165         continue;
1166       }
1167 
1168       uint16_t SavedReg = CSA.getSavedReg(Inst);
1169       uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1170       if (SavedReg != CSR && RestoredReg != CSR) {
1171         CFIs.clear();
1172         continue;
1173       }
1174 
1175       scheduleChange(&Inst, WorklistItem(UsePushPops
1176                                              ? WorklistItem::Erase
1177                                              : WorklistItem::ChangeToAdjustment,
1178                                          CSR));
1179 
1180       // Delete associated CFIs
1181       const bool RecordDeletedPushCFIs =
1182           SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1183       const bool RecordDeletedPopCFIs =
1184           RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1185       for (MCInst *CFI : CFIs) {
1186         const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1187         // Do not touch these...
1188         if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1189             MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1190           continue;
1191         scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1192         if (RecordDeletedPushCFIs) {
1193           // Do not record this to be replayed later because we are going to
1194           // rebuild it.
1195           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1196             continue;
1197           DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1198         }
1199         if (RecordDeletedPopCFIs) {
1200           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1201             continue;
1202           DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1203         }
1204       }
1205       CFIs.clear();
1206     }
1207   }
1208 }
1209 
1210 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1211   if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1212       CSA.getRestoredReg(Inst) == CSR)
1213     return false;
1214   BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1215   BC.MIB->getTouchedRegs(Inst, BV);
1216   return BV[CSR];
1217 }
1218 
1219 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1220     unsigned CSR, MCInst *BestPosSave,
1221     SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1222   auto &InsnToBB = Info.getInsnToBBMap();
1223   const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1224   const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1225   assert(FIESave && FIELoad && "Invalid CSR");
1226 
1227   LLVM_DEBUG({
1228     dbgs() << "Scheduling save insertion at: ";
1229     BestPosSave->dump();
1230   });
1231 
1232   scheduleChange(BestPosSave,
1233                  UsePushPops ? WorklistItem::InsertPushOrPop
1234                              : WorklistItem::InsertLoadOrStore,
1235                  *FIESave, CSR);
1236 
1237   for (ProgramPoint &PP : RestorePoints) {
1238     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1239     LLVM_DEBUG({
1240       dbgs() << "Scheduling restore insertion at: ";
1241       if (PP.isInst())
1242         PP.getInst()->dump();
1243       else
1244         dbgs() << PP.getBB()->getName() << "\n";
1245     });
1246     MCInst *Term =
1247         FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1248     if (Term)
1249       PP = Term;
1250     bool PrecededByPrefix = false;
1251     if (PP.isInst()) {
1252       auto Iter = FrontierBB->findInstruction(PP.getInst());
1253       if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1254         --Iter;
1255         PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1256       }
1257     }
1258     if (PP.isInst() &&
1259         (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1260       assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1261              "cannot move to end of bb");
1262       scheduleChange(InsnToBB[PP.getInst()],
1263                      UsePushPops ? WorklistItem::InsertPushOrPop
1264                                  : WorklistItem::InsertLoadOrStore,
1265                      *FIELoad, CSR);
1266       continue;
1267     }
1268     scheduleChange(PP,
1269                    UsePushPops ? WorklistItem::InsertPushOrPop
1270                                : WorklistItem::InsertLoadOrStore,
1271                    *FIELoad, CSR);
1272   }
1273 }
1274 
1275 void ShrinkWrapping::moveSaveRestores() {
1276   bool DisablePushPopMode = false;
1277   bool UsedPushPopMode = false;
1278   // Keeps info about successfully moved regs: reg index, save position and
1279   // save size
1280   std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1281 
1282   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1283     MCInst *BestPosSave = nullptr;
1284     uint64_t TotalEstimatedWin = 0;
1285     if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin))
1286       continue;
1287     SmallVector<ProgramPoint, 4> RestorePoints =
1288         doRestorePlacement(BestPosSave, I, TotalEstimatedWin);
1289     if (RestorePoints.empty())
1290       continue;
1291 
1292     const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1293     const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1294     (void)FIELoad;
1295     assert(FIESave && FIELoad);
1296     StackPointerTracking &SPT = Info.getStackPointerTracking();
1297     const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1298     int SaveOffset = SPFP.first;
1299     uint8_t SaveSize = FIESave->Size;
1300 
1301     // If we don't know stack state at this point, bail
1302     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1303         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
1304       continue;
1305 
1306     // Operation mode: if true, will insert push/pops instead of loads/restores
1307     bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1308 
1309     if (UsePushPops) {
1310       SmallVector<ProgramPoint, 4> FixedRestorePoints =
1311           fixPopsPlacements(RestorePoints, SaveOffset, I);
1312       if (FixedRestorePoints.empty())
1313         UsePushPops = false;
1314       else
1315         RestorePoints = FixedRestorePoints;
1316     }
1317 
1318     // Disable push-pop mode for all CSRs in this function
1319     if (!UsePushPops)
1320       DisablePushPopMode = true;
1321     else
1322       UsedPushPopMode = true;
1323 
1324     scheduleOldSaveRestoresRemoval(I, UsePushPops);
1325     scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1326     MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1327   }
1328 
1329   // Revert push-pop mode if it failed for a single CSR
1330   if (DisablePushPopMode && UsedPushPopMode) {
1331     UsedPushPopMode = false;
1332     for (BinaryBasicBlock &BB : BF) {
1333       auto WRI = Todo.find(&BB);
1334       if (WRI != Todo.end()) {
1335         std::vector<WorklistItem> &TodoList = WRI->second;
1336         for (WorklistItem &Item : TodoList)
1337           if (Item.Action == WorklistItem::InsertPushOrPop)
1338             Item.Action = WorklistItem::InsertLoadOrStore;
1339       }
1340       for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
1341         MCInst &Inst = *I;
1342         auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1343             Inst, getAnnotationIndex());
1344         if (!TodoList)
1345           continue;
1346         bool isCFI = BC.MIB->isCFI(Inst);
1347         for (WorklistItem &Item : *TodoList) {
1348           if (Item.Action == WorklistItem::InsertPushOrPop)
1349             Item.Action = WorklistItem::InsertLoadOrStore;
1350           if (!isCFI && Item.Action == WorklistItem::Erase)
1351             Item.Action = WorklistItem::ChangeToAdjustment;
1352         }
1353       }
1354     }
1355   }
1356 
1357   // Update statistics
1358   if (!UsedPushPopMode) {
1359     SpillsMovedRegularMode += MovedRegs.size();
1360     return;
1361   }
1362 
1363   // Schedule modifications to stack-accessing instructions via
1364   // StackLayoutModifier.
1365   SpillsMovedPushPopMode += MovedRegs.size();
1366   for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1367     unsigned RegNdx;
1368     MCInst *SavePos;
1369     size_t SaveSize;
1370     std::tie(RegNdx, SavePos, SaveSize) = I;
1371     for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1372       SLM.collapseRegion(Save);
1373     SLM.insertRegion(SavePos, SaveSize);
1374   }
1375 }
1376 
1377 namespace {
1378 /// Helper function to identify whether two basic blocks created by splitting
1379 /// a critical edge have the same contents.
1380 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1381                             const BinaryBasicBlock &B) {
1382   if (A.succ_size() != B.succ_size())
1383     return false;
1384   if (A.succ_size() != 1)
1385     return false;
1386 
1387   if (*A.succ_begin() != *B.succ_begin())
1388     return false;
1389 
1390   if (A.size() != B.size())
1391     return false;
1392 
1393   // Compare instructions
1394   auto I = A.begin(), E = A.end();
1395   auto OtherI = B.begin(), OtherE = B.end();
1396   while (I != E && OtherI != OtherE) {
1397     if (I->getOpcode() != OtherI->getOpcode())
1398       return false;
1399     if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1400           return true;
1401         }))
1402       return false;
1403     ++I;
1404     ++OtherI;
1405   }
1406   return true;
1407 }
1408 } // namespace
1409 
1410 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1411   bool Changed = false;
1412   for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1413     BinaryBasicBlock &BB = *Iter;
1414     if (!BB.getName().startswith(".LSplitEdge"))
1415       continue;
1416     for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
1417       BinaryBasicBlock &RBB = *RIter;
1418       if (&RBB == &BB)
1419         break;
1420       if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
1421           !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1422         continue;
1423       assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1424       BinaryBasicBlock *Pred = *RBB.pred_begin();
1425       uint64_t OrigCount = Pred->branch_info_begin()->Count;
1426       uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1427       BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1428       Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1429       Changed = true;
1430       // Remove the block from CFG
1431       RBB.markValid(false);
1432     }
1433   }
1434 
1435   return Changed;
1436 }
1437 
1438 namespace {
1439 
1440 // A special StackPointerTracking that compensates for our future plans
1441 // in removing/adding insn.
1442 class PredictiveStackPointerTracking
1443     : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1444   friend class DataflowAnalysis<PredictiveStackPointerTracking,
1445                                 std::pair<int, int>>;
1446   decltype(ShrinkWrapping::Todo) &TodoMap;
1447   DataflowInfoManager &Info;
1448 
1449   Optional<unsigned> AnnotationIndex;
1450 
1451 protected:
1452   void compNextAux(const MCInst &Point,
1453                    const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1454                    std::pair<int, int> &Res) {
1455     for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1456       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1457           BC.MIB->isPush(Point)) {
1458         Res.first += BC.MIB->getPushSize(Point);
1459         continue;
1460       }
1461       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1462           BC.MIB->isPop(Point)) {
1463         Res.first -= BC.MIB->getPopSize(Point);
1464         continue;
1465       }
1466       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1467           Item.FIEToInsert.IsStore) {
1468         Res.first -= Item.FIEToInsert.Size;
1469         continue;
1470       }
1471       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1472           Item.FIEToInsert.IsLoad) {
1473         Res.first += Item.FIEToInsert.Size;
1474         continue;
1475       }
1476     }
1477   }
1478 
1479   std::pair<int, int> computeNext(const MCInst &Point,
1480                                   const std::pair<int, int> &Cur) {
1481     std::pair<int, int> Res =
1482         StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1483             Point, Cur);
1484     if (Res.first == StackPointerTracking::SUPERPOSITION ||
1485         Res.first == StackPointerTracking::EMPTY)
1486       return Res;
1487     auto TodoItems =
1488         BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1489             Point, ShrinkWrapping::getAnnotationName());
1490     if (TodoItems)
1491       compNextAux(Point, *TodoItems, Res);
1492     auto &InsnToBBMap = Info.getInsnToBBMap();
1493     if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1494       return Res;
1495     auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1496     if (WRI == TodoMap.end())
1497       return Res;
1498     compNextAux(Point, WRI->second, Res);
1499     return Res;
1500   }
1501 
1502   StringRef getAnnotationName() const {
1503     return StringRef("PredictiveStackPointerTracking");
1504   }
1505 
1506 public:
1507   PredictiveStackPointerTracking(BinaryFunction &BF,
1508                                  decltype(ShrinkWrapping::Todo) &TodoMap,
1509                                  DataflowInfoManager &Info,
1510                                  MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1511       : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1512                                                                  AllocatorId),
1513         TodoMap(TodoMap), Info(Info) {}
1514 
1515   void run() {
1516     StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1517   }
1518 };
1519 
1520 } // end anonymous namespace
1521 
1522 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1523                                       int SPValPop) {
1524   MCInst *SavePoint = nullptr;
1525   for (BinaryBasicBlock &BB : BF) {
1526     for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
1527          ++InstIter) {
1528       int32_t SrcImm = 0;
1529       MCPhysReg Reg = 0;
1530       MCPhysReg StackPtrReg = 0;
1531       int64_t StackOffset = 0;
1532       bool IsIndexed = false;
1533       bool IsLoad = false;
1534       bool IsStore = false;
1535       bool IsSimple = false;
1536       bool IsStoreFromReg = false;
1537       uint8_t Size = 0;
1538       if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
1539                                  Reg, SrcImm, StackPtrReg, StackOffset, Size,
1540                                  IsSimple, IsIndexed))
1541         continue;
1542       if (Reg != CSR || !IsStore || !IsSimple)
1543         continue;
1544       SavePoint = &*InstIter;
1545       break;
1546     }
1547     if (SavePoint)
1548       break;
1549   }
1550   assert(SavePoint);
1551   LLVM_DEBUG({
1552     dbgs() << "Now using as save point for reg " << CSR << " :";
1553     SavePoint->dump();
1554   });
1555   bool PrevAffectedZone = false;
1556   BinaryBasicBlock *PrevBB = nullptr;
1557   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1558   for (BinaryBasicBlock *BB : BF.layout()) {
1559     if (BB->size() == 0)
1560       continue;
1561     const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1562     const bool InAffectedZoneAtBegin =
1563         (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1564     bool InAffectedZone = InAffectedZoneAtBegin;
1565     for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1566       const bool CurZone = DA.count(*InstIter, *SavePoint);
1567       if (InAffectedZone != CurZone) {
1568         auto InsertionIter = InstIter;
1569         ++InsertionIter;
1570         InAffectedZone = CurZone;
1571         if (InAffectedZone)
1572           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1573                                             SPValPop);
1574         else
1575           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1576                                             SPValPush);
1577         --InstIter;
1578       }
1579     }
1580     // Are we at the first basic block or hot-cold split point?
1581     if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1582       if (InAffectedZoneAtBegin)
1583         insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1584     } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1585       if (InAffectedZoneAtBegin)
1586         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1587       else
1588         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1589     }
1590     PrevAffectedZone = InAffectedZoneAtEnd;
1591     PrevBB = BB;
1592   }
1593 }
1594 
1595 void ShrinkWrapping::rebuildCFIForSP() {
1596   for (BinaryBasicBlock &BB : BF) {
1597     for (MCInst &Inst : BB) {
1598       if (!BC.MIB->isCFI(Inst))
1599         continue;
1600       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1601       if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1602         BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1603     }
1604   }
1605 
1606   int PrevSPVal = -8;
1607   BinaryBasicBlock *PrevBB = nullptr;
1608   StackPointerTracking &SPT = Info.getStackPointerTracking();
1609   for (BinaryBasicBlock *BB : BF.layout()) {
1610     if (BB->size() == 0)
1611       continue;
1612     const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1613     const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1614     int SPVal = SPValAtBegin;
1615     for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1616       const int CurVal = SPT.getStateAt(*Iter)->first;
1617       if (SPVal != CurVal) {
1618         auto InsertionIter = Iter;
1619         ++InsertionIter;
1620         Iter = BF.addCFIInstruction(
1621             BB, InsertionIter,
1622             MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1623         SPVal = CurVal;
1624       }
1625     }
1626     if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1627       BF.addCFIInstruction(
1628           BB, BB->begin(),
1629           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1630     else if (SPValAtBegin != PrevSPVal)
1631       BF.addCFIInstruction(
1632           PrevBB, PrevBB->end(),
1633           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1634     PrevSPVal = SPValAtEnd;
1635     PrevBB = BB;
1636   }
1637 
1638   for (BinaryBasicBlock &BB : BF)
1639     for (auto I = BB.begin(); I != BB.end();)
1640       if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1641         I = BB.eraseInstruction(I);
1642       else
1643         ++I;
1644 }
1645 
1646 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1647                                          const FrameIndexEntry &FIE,
1648                                          bool CreatePushOrPop) {
1649   MCInst NewInst;
1650   if (SPVal != StackPointerTracking::SUPERPOSITION &&
1651       SPVal != StackPointerTracking::EMPTY) {
1652     if (FIE.IsLoad) {
1653       if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1654                                           FIE.StackOffset - SPVal, FIE.RegOrImm,
1655                                           FIE.Size)) {
1656         errs() << "createRestoreFromStack: not supported on this platform\n";
1657         abort();
1658       }
1659     } else {
1660       if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1661                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
1662                                      FIE.Size)) {
1663         errs() << "createSaveToStack: not supported on this platform\n";
1664         abort();
1665       }
1666     }
1667     if (CreatePushOrPop)
1668       BC.MIB->changeToPushOrPop(NewInst);
1669     return NewInst;
1670   }
1671   assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1672          FPVal != StackPointerTracking::EMPTY);
1673 
1674   if (FIE.IsLoad) {
1675     if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1676                                         FIE.StackOffset - FPVal, FIE.RegOrImm,
1677                                         FIE.Size)) {
1678       errs() << "createRestoreFromStack: not supported on this platform\n";
1679       abort();
1680     }
1681   } else {
1682     if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1683                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
1684                                    FIE.Size)) {
1685       errs() << "createSaveToStack: not supported on this platform\n";
1686       abort();
1687     }
1688   }
1689   return NewInst;
1690 }
1691 
1692 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1693   const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1694   if (UpdatedCFIs.count(CFI))
1695     return;
1696 
1697   switch (CFI->getOperation()) {
1698   case MCCFIInstruction::OpDefCfa:
1699   case MCCFIInstruction::OpDefCfaRegister:
1700   case MCCFIInstruction::OpDefCfaOffset:
1701     CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1702     break;
1703   case MCCFIInstruction::OpOffset:
1704   default:
1705     break;
1706   }
1707 
1708   UpdatedCFIs.insert(CFI);
1709 }
1710 
1711 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1712                                                 BBIterTy Pos, unsigned Reg,
1713                                                 bool isPush, int Sz,
1714                                                 int64_t NewOffset) {
1715   if (isPush) {
1716     for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1717       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1718       updateCFIInstOffset(*Pos++, NewOffset);
1719     }
1720     if (HasDeletedOffsetCFIs[Reg]) {
1721       Pos = BF.addCFIInstruction(
1722           &BB, Pos,
1723           MCCFIInstruction::createOffset(
1724               nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1725       ++Pos;
1726     }
1727   } else {
1728     for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1729       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1730       updateCFIInstOffset(*Pos++, NewOffset);
1731     }
1732     if (HasDeletedOffsetCFIs[Reg]) {
1733       Pos = BF.addCFIInstruction(
1734           &BB, Pos,
1735           MCCFIInstruction::createSameValue(
1736               nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1737       ++Pos;
1738     }
1739   }
1740   return Pos;
1741 }
1742 
1743 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1744                                           BinaryBasicBlock *CurBB,
1745                                           const WorklistItem &Item,
1746                                           int64_t SPVal, int64_t FPVal) {
1747   // Trigger CFI reconstruction for this CSR if necessary - writing to
1748   // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1749   if ((Item.FIEToInsert.IsStore &&
1750        !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1751       (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1752       HasDeletedOffsetCFIs[Item.AffectedReg]) {
1753     if (Item.Action == WorklistItem::InsertPushOrPop) {
1754       if (Item.FIEToInsert.IsStore)
1755         PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1756       else
1757         PopOffsetByReg[Item.AffectedReg] = SPVal;
1758     } else {
1759       if (Item.FIEToInsert.IsStore)
1760         PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1761       else
1762         PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1763     }
1764   }
1765 
1766   LLVM_DEBUG({
1767     dbgs() << "Creating stack access with SPVal = " << SPVal
1768            << "; stack offset = " << Item.FIEToInsert.StackOffset
1769            << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1770            << "\n";
1771   });
1772   MCInst NewInst =
1773       createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1774                         Item.Action == WorklistItem::InsertPushOrPop);
1775   if (InsertionPoint != CurBB->end()) {
1776     LLVM_DEBUG({
1777       dbgs() << "Adding before Inst: ";
1778       InsertionPoint->dump();
1779       dbgs() << "the following inst: ";
1780       NewInst.dump();
1781     });
1782     BBIterTy Iter =
1783         CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1784     return ++Iter;
1785   }
1786   CurBB->addInstruction(std::move(NewInst));
1787   LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1788   return CurBB->end();
1789 }
1790 
1791 BBIterTy ShrinkWrapping::processInsertionsList(
1792     BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1793     std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1794   bool HasInsertions = false;
1795   for (WorklistItem &Item : TodoList) {
1796     if (Item.Action == WorklistItem::Erase ||
1797         Item.Action == WorklistItem::ChangeToAdjustment)
1798       continue;
1799     HasInsertions = true;
1800     break;
1801   }
1802 
1803   if (!HasInsertions)
1804     return InsertionPoint;
1805 
1806   assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1807            SPVal != StackPointerTracking::EMPTY) ||
1808           (FPVal != StackPointerTracking::SUPERPOSITION &&
1809            FPVal != StackPointerTracking::EMPTY)) &&
1810          "Cannot insert if we have no idea of the stack state here");
1811 
1812   // Revert the effect of PSPT for this location, we want SP Value before
1813   // insertions
1814   if (InsertionPoint == CurBB->end()) {
1815     for (WorklistItem &Item : TodoList) {
1816       if (Item.Action != WorklistItem::InsertPushOrPop)
1817         continue;
1818       if (Item.FIEToInsert.IsStore)
1819         SPVal += Item.FIEToInsert.Size;
1820       if (Item.FIEToInsert.IsLoad)
1821         SPVal -= Item.FIEToInsert.Size;
1822     }
1823   }
1824 
1825   // Reorder POPs to obey the correct dominance relation between them
1826   std::stable_sort(TodoList.begin(), TodoList.end(),
1827                    [&](const WorklistItem &A, const WorklistItem &B) {
1828                      if ((A.Action != WorklistItem::InsertPushOrPop ||
1829                           !A.FIEToInsert.IsLoad) &&
1830                          (B.Action != WorklistItem::InsertPushOrPop ||
1831                           !B.FIEToInsert.IsLoad))
1832                        return false;
1833                      if ((A.Action != WorklistItem::InsertPushOrPop ||
1834                           !A.FIEToInsert.IsLoad))
1835                        return true;
1836                      if ((B.Action != WorklistItem::InsertPushOrPop ||
1837                           !B.FIEToInsert.IsLoad))
1838                        return false;
1839                      return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1840                    });
1841 
1842   // Process insertions
1843   for (WorklistItem &Item : TodoList) {
1844     if (Item.Action == WorklistItem::Erase ||
1845         Item.Action == WorklistItem::ChangeToAdjustment)
1846       continue;
1847 
1848     InsertionPoint =
1849         processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1850     if (Item.Action == WorklistItem::InsertPushOrPop &&
1851         Item.FIEToInsert.IsStore)
1852       SPVal -= Item.FIEToInsert.Size;
1853     if (Item.Action == WorklistItem::InsertPushOrPop &&
1854         Item.FIEToInsert.IsLoad)
1855       SPVal += Item.FIEToInsert.Size;
1856   }
1857   return InsertionPoint;
1858 }
1859 
1860 bool ShrinkWrapping::processInsertions() {
1861   PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1862   PSPT.run();
1863 
1864   bool Changes = false;
1865   for (BinaryBasicBlock &BB : BF) {
1866     // Process insertions before some inst.
1867     for (auto I = BB.begin(); I != BB.end(); ++I) {
1868       MCInst &Inst = *I;
1869       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1870           Inst, getAnnotationIndex());
1871       if (!TodoList)
1872         continue;
1873       Changes = true;
1874       std::vector<WorklistItem> List = *TodoList;
1875       LLVM_DEBUG({
1876         dbgs() << "Now processing insertions in " << BB.getName()
1877                << " before inst: ";
1878         Inst.dump();
1879       });
1880       auto Iter = I;
1881       std::pair<int, int> SPTState =
1882           *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1883       I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1884     }
1885     // Process insertions at the end of bb
1886     auto WRI = Todo.find(&BB);
1887     if (WRI != Todo.end()) {
1888       std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1889       processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
1890                             SPTState.second);
1891       Changes = true;
1892     }
1893   }
1894   return Changes;
1895 }
1896 
1897 void ShrinkWrapping::processDeletions() {
1898   LivenessAnalysis &LA = Info.getLivenessAnalysis();
1899   for (BinaryBasicBlock &BB : BF) {
1900     for (auto II = BB.begin(); II != BB.end(); ++II) {
1901       MCInst &Inst = *II;
1902       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1903           Inst, getAnnotationIndex());
1904       if (!TodoList)
1905         continue;
1906       // Process all deletions
1907       for (WorklistItem &Item : *TodoList) {
1908         if (Item.Action != WorklistItem::Erase &&
1909             Item.Action != WorklistItem::ChangeToAdjustment)
1910           continue;
1911 
1912         if (Item.Action == WorklistItem::ChangeToAdjustment) {
1913           // Is flag reg alive across this func?
1914           bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1915           if (int Sz = BC.MIB->getPushSize(Inst)) {
1916             BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1917             continue;
1918           }
1919           if (int Sz = BC.MIB->getPopSize(Inst)) {
1920             BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1921             continue;
1922           }
1923         }
1924 
1925         LLVM_DEBUG({
1926           dbgs() << "Erasing: ";
1927           BC.printInstruction(dbgs(), Inst);
1928         });
1929         II = std::prev(BB.eraseInstruction(II));
1930         break;
1931       }
1932     }
1933   }
1934 }
1935 
1936 void ShrinkWrapping::rebuildCFI() {
1937   const bool FP = Info.getStackPointerTracking().HasFramePointer;
1938   Info.invalidateAll();
1939   if (!FP) {
1940     rebuildCFIForSP();
1941     Info.invalidateAll();
1942   }
1943   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1944     if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1945       continue;
1946     const int64_t SPValPush = PushOffsetByReg[I];
1947     const int64_t SPValPop = PopOffsetByReg[I];
1948     insertUpdatedCFI(I, SPValPush, SPValPop);
1949     Info.invalidateAll();
1950   }
1951 }
1952 
1953 bool ShrinkWrapping::perform() {
1954   HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1955   PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1956   PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1957   DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
1958 
1959   if (BF.checkForAmbiguousJumpTables()) {
1960     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1961                       << ".\n");
1962     // We could call disambiguateJumpTables here, but it is probably not worth
1963     // the cost (of duplicating potentially large jump tables that could regress
1964     // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1965     // written in assembly language. Just bail.
1966     return false;
1967   }
1968   SLM.initialize();
1969   CSA.compute();
1970   classifyCSRUses();
1971   pruneUnwantedCSRs();
1972   computeSaveLocations();
1973   computeDomOrder();
1974   moveSaveRestores();
1975   LLVM_DEBUG({
1976     dbgs() << "Func before shrink-wrapping: \n";
1977     BF.dump();
1978   });
1979   SLM.performChanges();
1980   // Early exit if processInsertions doesn't detect any todo items
1981   if (!processInsertions())
1982     return false;
1983   processDeletions();
1984   if (foldIdenticalSplitEdges()) {
1985     const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
1986     (void)Stats;
1987     LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
1988                       << " redundant split edge BBs (" << Stats.second
1989                       << " bytes) for " << BF.getPrintName() << "\n");
1990   }
1991   rebuildCFI();
1992   // We may have split edges, creating BBs that need correct branching
1993   BF.fixBranches();
1994   LLVM_DEBUG({
1995     dbgs() << "Func after shrink-wrapping: \n";
1996     BF.dump();
1997   });
1998   return true;
1999 }
2000 
2001 void ShrinkWrapping::printStats() {
2002   outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2003          << " spills inserting load/stores and " << SpillsMovedPushPopMode
2004          << " spills inserting push/pops\n";
2005 }
2006 
2007 // Operators necessary as a result of using MCAnnotation
2008 raw_ostream &operator<<(raw_ostream &OS,
2009                         const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2010   OS << "SWTodo[";
2011   const char *Sep = "";
2012   for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2013     OS << Sep;
2014     switch (Item.Action) {
2015     case ShrinkWrapping::WorklistItem::Erase:
2016       OS << "Erase";
2017       break;
2018     case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2019       OS << "ChangeToAdjustment";
2020       break;
2021     case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2022       OS << "InsertLoadOrStore";
2023       break;
2024     case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2025       OS << "InsertPushOrPop";
2026       break;
2027     }
2028     Sep = ", ";
2029   }
2030   OS << "]";
2031   return OS;
2032 }
2033 
2034 raw_ostream &
2035 operator<<(raw_ostream &OS,
2036            const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2037   OS << "SLMTodo[";
2038   const char *Sep = "";
2039   for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2040     OS << Sep;
2041     switch (Item.Action) {
2042     case StackLayoutModifier::WorklistItem::None:
2043       OS << "None";
2044       break;
2045     case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2046       OS << "AdjustLoadStoreOffset";
2047       break;
2048     case StackLayoutModifier::WorklistItem::AdjustCFI:
2049       OS << "AdjustCFI";
2050       break;
2051     }
2052     Sep = ", ";
2053   }
2054   OS << "]";
2055   return OS;
2056 }
2057 
2058 bool operator==(const ShrinkWrapping::WorklistItem &A,
2059                 const ShrinkWrapping::WorklistItem &B) {
2060   return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2061           A.Adjustment == B.Adjustment &&
2062           A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2063           A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2064           A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2065           A.FIEToInsert.Size == B.FIEToInsert.Size &&
2066           A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2067           A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2068 }
2069 
2070 bool operator==(const StackLayoutModifier::WorklistItem &A,
2071                 const StackLayoutModifier::WorklistItem &B) {
2072   return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2073 }
2074 
2075 } // end namespace bolt
2076 } // end namespace llvm
2077